Example #1
0
def parse_xml_plugins_list(jenkins_base_url, chunk_size, auth):
    chunk_size = chunk_size * 1024
    xml_pull_parser = ET.XMLPullParser()
    plugins = []

    with requests.get("{jenkins_base_url}/{path}".format(
            jenkins_base_url=jenkins_base_url,
            path=JENKINS_PLUGIN_MANAGER_PATH),
                      auth=auth,
                      stream=True) as jenkins_response:
        # parse the xml plugins list a chunk at a time, a very large set of plugins may be installed
        for chunk in jenkins_response.iter_content(chunk_size):
            if chunk:
                xml_pull_parser.feed(chunk)
                try:
                    for event, element in xml_pull_parser.read_events():
                        """
                        Rely on the fact that a plugin version is always 
                        encountered after its short name in <plugin> tag children
                        """
                        if TAGS["SHORT_NAME"] == element.tag:
                            plugin_line = element.text
                        elif TAGS["VERSION"] == element.tag:
                            plugins.append(plugin_line + ":" + element.text)
                            plugin_line = ""
                except ET.ParseError as parse_err:
                    print(
                        "Jenkins response is not in parsable XML format, "
                        "check your access rights to the instance: {parse_err}"
                        .format(parse_err=parse_err))
                    return 1
    for plugin in sorted(plugins):
        print(plugin)
    return 0
Example #2
0
 def init_parser(self):
     """init the XML parser. The parser must always be reset for each new
     connexion
     """
     self.xml_depth = 0
     self.xml_root = None
     self.parser = ET.XMLPullParser(("start", "end"))
Example #3
0
def parseBytes(bString):
    xml_encoding = re.search(b"encoding=[\'\"](\S*)[\'\"].* ?\?\>",
                             bString).group(1)
    encoding = xml_encoding.decode(encoding=ENCODING)
    header = bString[:8]
    try:
        body = bString[8:].strip().decode(encoding)
    except:
        print("Cannot decode CSTA message with ", encoding)
        print(bString)
        raise
    try:
        root = ET.fromstring(body)
    except:
        print("Cannot parse CSTA message string:")
        print(header)
        print(body)
        raise
    tree = ET.ElementTree(root)
    # ns=re.search("^{(.*)}",root.tag)
    XMLParser = ET.XMLPullParser(events=['start-ns'])
    XMLParser.feed(body)
    ns = [e[1] for e in XMLParser.read_events()]
    if ns:
        # namespace=ns.group(1)
        cstamessage = CstaMessage(header, tree, body, encoding=encoding, ns=ns)
    else:
        # print("Warning: No namespace defined in message",root.tag)
        cstamessage = CstaMessage(header, tree, body, encoding=encoding)
    return cstamessage
def read(source_file_name):
	nodes = {}
	ways = {}
	relations = {}
	way = None
	relation = None
	element = None
	parser = ET.XMLPullParser(['start'])
	source_file = open(source_file_name, "r", encoding='utf8')
	parser.feed(source_file.read())
	source_file.close()
	for event, elem in parser.read_events():
		if elem.tag == 'node':
			nodes[elem.get('id')] = [float(elem.get('lon')), float(elem.get('lat'))]
		elif elem.tag == 'way':
			element = way = ways[elem.get('id')] = Way()
		elif elem.tag == 'relation':
			element = relation = relations[elem.get('id')] = Relation()
		elif elem.tag == 'tag':
			element.tags.append([elem.get('k'),elem.get('v')])
		elif elem.tag == 'nd':
			way.coords.append(nodes[elem.get('ref')])
		elif elem.tag == 'member':
			if elem.get('type') == 'way':
				coords = ways[elem.get('ref')].coords
				if elem.get('role') == 'outer':
					relation.outer.append(coords)
				elif elem.get('role') == 'inner':
					relation.inner.append(coords)
	return ways.values(), relations.values()
Example #5
0
def _verify_all_tags_closed(xml_text: str) -> Optional[str]:
    """
    Verify that all the tags were properly closed in the XML given as text.

    Return error if any.
    """
    parser = ET.XMLPullParser(["start", "end"])
    parser.feed(xml_text.encode("utf-8"))

    open_tags = []  # type: List[ET.Element]

    iterator = parser.read_events()
    while True:
        try:
            event, element = next(iterator)
        except StopIteration:
            break
        except ET.ParseError as exception:
            lineno, _ = exception.position
            line = xml_text.splitlines()[lineno - 1]

            if exception.msg.startswith("mismatched tag:"):
                return (
                    f"{exception.msg}; the line was: {line!r}, "
                    f"the open tag(s) up to that point: "
                    f"{list(map(rasaeco.et.to_str, open_tags))}. "
                    f"Did you maybe forget to close the tag "
                    f"{rasaeco.et.to_str(open_tags[-1])}? "
                    f"See also https://github.com/mristin/rasaeco#known-issues "
                    f"in case you have missing or too many new lines.")
            else:
                return f"{exception.msg}; the line was: {line!r}"

        if event == "start":
            open_tags.append(element)
        elif event == "end":
            if len(open_tags) == 0:
                return (f"Unexpected closing tag "
                        f"{rasaeco.et.to_str(element)} and no open tags")

            elif open_tags[-1].tag != element.tag:
                return (f"Unexpected closing tag "
                        f"{rasaeco.et.to_str(element)} as the last opened "
                        f"tag was: {rasaeco.et.to_str(open_tags[-1])}")

            elif open_tags[-1].tag == element.tag:
                open_tags.pop()

            else:
                raise AssertionError(
                    f"Unhandled case: "
                    f"element.tag is {rasaeco.et.to_str(element)}, "
                    f"event: {event}, "
                    f"open tags: "
                    f"{list(map(rasaeco.et.to_str, open_tags))}")
        else:
            raise AssertionError(f"Unhandled event: {event}")

    return None
Example #6
0
def load_errortypes_xml(file):
    parser = ET.XMLPullParser(['start', 'end'])
    while True:
        l = file.readline().decode()
        parser.feed(l)

        if not l.strip():
            return parser
Example #7
0
 def __init__(self):
     self.decompressor = zlib.decompressobj(wbits=zlib.MAX_WBITS + 16)
     self.xml_parser = ElementTree.XMLPullParser(['end'])
     # ElementTree mangles the tags thus: '{xml_namespace}tag_name'
     self.tag_re = re.compile(
         '({[^}]+}|)(location|size|checksum|package|time)$')
     # Package state must persist across `feed()` calls, since a
     # package element may straddle a chunk boundary.
     self._package = {}
Example #8
0
def valgrind_output_xml(pipe_r):
    ''' Extract valgrind output XML from a pipe file descriptor '''
    with os.fdopen(pipe_r, 'r', closefd=False) as fout:
        parser = XmlElementTree.XMLPullParser()
        for line in fout:
            parser.feed(line)

            for event in parser.read_events():
                if event[1].tag == 'valgrindoutput':
                    return event[1]
Example #9
0
def pullParse(data):
    parser = ET.XMLPullParser(['start', 'end'])
    parser.feed(data)
    for event, elem in parser.read_events():
        print('Event: ', event)
        if elem.text != None:
            text = elem.text.replace('\n', '')
            text = text.replace(' ', '')
            if text != '':
                print('Tag:', elem.tag, ", text:", elem.text)
            else:
                print('Tag:', elem.tag, ", text: NONE")
Example #10
0
    def clearParser(self):
        """

        :return: success for test purpose
        """
        # XML parser
        self.parser = ETree.XMLPullParser(['start', 'end'])
        self.parser.feed('<root>')
        # clear the event queue of parser
        for _, _ in self.parser.read_events():
            pass

        return True
Example #11
0
def read_tin_xml_ET(fn):
    """
    Parse a TIN from a LandXML file.
    Returns two arrays:
      P: points [:,3] doubles
      F: faces [:,3]  integers
    
    This version uses a proper XML parser, for robust but slow parsing.

    Only handles a single TIN per file
    """
    Ps = np.nan * np.zeros((1, 3), np.float64)
    Fs = -1 * np.ones((1, 3), np.int32)
    Fcount = 0

    parser = ET.XMLPullParser(['start', 'end'])

    tag_types = {}
    blksize = 10000
    n_blks = 1 + int(os.stat(fn).st_size / blksize)
    with open(fn, 'rt') as fp:
        for _ in utils.progress(range(n_blks)):
            buff = fp.read(blksize)
            if len(buff) == 0:
                break
            parser.feed(buff)
            for event, elem in parser.read_events():
                if elem.tag not in tag_types:
                    print(elem.tag, 'text=', elem.text)
                    tag_types[elem.tag] = elem
                if elem.text is None: continue
                if elem.tag == "{http://www.landxml.org/schema/LandXML-1.2}P":
                    pid = int(elem.attrib['id'])
                    P = [float(s) for s in elem.text.split()]
                    # Appears that these are written lat/long order, but I prefer
                    # to keep everything x/y
                    P[0], P[1] = P[1], P[0]
                    while len(Ps) < pid + 1:
                        Ps = np.concatenate([Ps, np.nan * Ps], axis=0)
                    Ps[pid] = P
                elif elem.tag == "{http://www.landxml.org/schema/LandXML-1.2}F":
                    F = [int(s) for s in elem.text.split()]
                    fid = Fcount
                    while fid + 1 > len(Fs):
                        Fs = np.concatenate([Fs, -1 * Fs], axis=0)
                    Fs[fid] = F
                    Fcount += 1

    return Ps, Fs
Example #12
0
def parse_send(line, parsers):
    """Send:<time>:<id>:<xml>

    :param line:
    :param parsers:

    """
    split = line.split(":")
    time = float(split[1])
    identifier = split[2][1:-1]
    if identifier not in parsers or "<?xml" in ":".join(split[3:]):
        parser = ET.XMLPullParser(["start", "end"])
        parsers[identifier] = parser
    xml = parse_xml(":".join(split[3:]), parsers[identifier], False)
    return identifier, [(time, x) for x in xml]
Example #13
0
def getpages(bz2data):
    xml = bz2.decompress(bz2data).decode("utf-8")
    parser = ET.XMLPullParser()
    parser.feed("<pages>")
    parser.feed(xml)
    ns, id = 0, 0
    for ev, el in parser.read_events():
        if el.tag == "ns":
            ns = int(el.text)
            id = 0
        elif id == 0 and el.tag == "id":
            id = int(el.text)
        elif ns == 0 and el.tag == "text":
            with io.StringIO(el.text) as text:
                yield id, text
Example #14
0
def parse_recv(lines, parsers):
    """Recv:<time>:<id>:<xml>

    :param lines:
    :param parsers:

    """
    split = lines[0].split(":")
    time = float(split[1])
    identifier = split[2][1:-1]
    if identifier not in parsers or "<?xml" in ":".join(split[3:]):
        parser = ET.XMLPullParser(["start", "end"])
        parsers[identifier] = parser
    xml = parse_xml(":".join(split[3:]) + "".join(lines[1:]), parsers[identifier], True)
    return identifier, [(time, x) for x in xml]
Example #15
0
def parse_book(book_path):
    if book_path.endswith('fb2'):
        for event, elem in ET.iterparse(book_path, events=('end', 'end-ns')):
            if elem and elem.tag.endswith('title-info'):
                root = elem
                break
    else:
        if book_path.endswith('.zip'):
            book_file = zipfile.ZipFile(book_path)
            book_file = book_file.open(book_file.namelist()[0])
        elif book_path.endswith('.gz'):
            book_file = gzip.open(book_path, 'r')

        text = []
        while book_file.readable():
            line = book_file.readline().decode('utf-8')
            text.append(line)
            if '</title-info>' in line:
                break
        text = ''.join(text)

        parser = ET.XMLPullParser(['start'])
        parser.feed(text)
        for event, elem in parser.read_events():
            if elem and elem.tag.endswith('title-info'):
                root = elem

    def find_tags(elem, title, author, year):
        tag = elem.tag.lower()
        if not title and 'book-title' in tag:
            title = elem.text
            return title, author, year
        if not author and 'author' in tag:
            author = ' '.join(filter(lambda x: x,
                                     [elem[0].text, elem[1].text]))
            return title, author, year
        if not year and 'date' in tag:
            year = elem.attrib['value'] if 'value' in elem.attrib else elem.text
            return title, author, year

        for child in elem:
            if not all([title, author, year]):
                title, author, year = find_tags(child, title, author, year)
        return title, author, year

    title, author, year = find_tags(root, None, None, None)
    year = dateparser.parse(year).year if year else None
    return (title, author, year), book_path
Example #16
0
def find_all_tags(fp, tags, progress_callback=None):
    parser = ET.XMLPullParser(("start", "end"))
    root = None
    while True:
        chunk = fp.read(1024 * 1024)
        if not chunk:
            break
        parser.feed(chunk)
        for event, el in parser.read_events():
            if event == "start" and root is None:
                root = el
            if event == "end" and el.tag in tags:
                yield el.tag, el
            root.clear()
        if progress_callback is not None:
            progress_callback(len(chunk))
Example #17
0
 def parse(self, spec: str) -> dict:
     """
     Parses the given spec in a non-blocking manner
     :param spec: path to the XML spec file
     :return: a dictionary of id -> operations
     """
     results = {}
     with open(spec, 'rb') as file:
         parser = ET.XMLPullParser(['end'])
         for chunk in self.read_spec(file):
             for data in chunk:
                 parser.feed(data)
                 for event, element in parser.read_events():
                     if element.tag in self.operations and 'id' in element.attrib:
                         node_xml = ET.tostring(element, encoding='unicode')
                         ops = self.process_node(node_xml)
                         results[element.attrib['id']] = ops
     return results
Example #18
0
    def _get_bible_name(filename, hint_line=10):
        """Check whether it is a XML file with following xml tag and attributes.
        <XMLBIBLE biblename="King James 2000" type="x-bible">

        return biblename if it is a valid bible.
        """
        lines = ""
        with open(filename, "r", encoding="utf-8") as f:
            for i in range(hint_line):
                lines += f.readline()

        parser = ET.XMLPullParser(["start", "end"])
        parser.feed(lines)
        for event, elem in parser.read_events():
            if event == "start" and elem.tag == "XMLBIBLE" and "biblename" in elem.attrib:
                return elem.attrib["biblename"]

            return None
Example #19
0
def xml_parse_repodata(repodata_path, element_tag, repodata_type):
    file_extension = os.path.splitext(repodata_path)[1]
    iterator = decompression_iter(repodata_path, file_extension[1:])

    if repodata_type == "primary":
        parse_pkg_elem = parse_primary_pkg_elem
    elif repodata_type == "filelists":
        parse_pkg_elem = parse_filelists_pkg_elem
    elif repodata_type == "other":
        parse_pkg_elem = parse_other_pkg_elem
    elif repodata_type == "repomd":
        parse_pkg_elem = parse_repomd_item_elem

    parser = ET.XMLPullParser(['end'])
    metadata_obj = Metadata(repodata_path)
    for xml_data in iterator:
        parser.feed(xml_data)
        for event, element in parser.read_events():
            if event == "end" and element.tag == element_tag:
                pp = parse_pkg_elem(element)
                metadata_obj.append(pp.checksum, pp)
    return metadata_obj
Example #20
0
def depth(elem, level):
    global maxdepth
    # your code goes here
    '''
    level += 1
    if level >= maxdepth:
        maxdepth = level
    for child in elem:
        depth(child, level)
    '''
    maxdepth = level
    # print(etree.tostring(elem))
    parser = etree.XMLPullParser(['start', 'end', 'start-ns', 'end-ns'])
    parser.feed(etree.tostring(elem))
    # print(parser)
    for (event, elem) in parser.read_events():
        # print(elem)
        if event == 'start':
            level += 1
            if level > maxdepth: maxdepth = level
        elif event == 'end':
            level -= 1
    return
Example #21
0
def sent2iob(sent, format="c", tag_list=None, unk_expand=False, bert=False):
    if unk_expand or bert:
        sent = sent.replace(' ', '')
    text = '<sent>' + sent + '</sent>'
    parser = ET.XMLPullParser(['start', 'end'])
    parser.feed(text)

    ne_type = "O"
    ne_prefix = ""
    res = ""
    label = []
    tag_set = set()
    print(sent)
    for event, elem in parser.read_events():
        isuse = tag_list is None or (tag_list is not None
                                     and elem.tag in tag_list)
        if event == "start":
            assert len(tag_set) < 2, "タグが入れ子になっています\n{}".format(sent)
            word = elem.text if elem.text is not None else ""
            res += word

            #isuse = tag_list is None or (tag_list is not None and elem.tag in tag_list)
            if elem.tag != "sent" and isuse:
                tag_set.add(elem.tag)
                label += [elem.tag] * len(word)
            else:
                label += ["O"] * len(word)

        if event == "end":
            if elem.tag != "sent" and isuse:
                tag_set.remove(elem.tag)
            word = elem.tail if elem.tail is not None else ""
            res += word
            label += ["O"] * len(word)

    if format == "c":
        res = list(res)
        nums = [len(r) for r in res]
    elif format == "w":
        mecab = MeCab.Tagger('-Owakati')
        res = mecab.parse(res)[:-1].split(' ')[:-1]
        nums = [len(r) for r in res]
    else:
        if unk_expand:
            res, nums = format(res)
        else:
            res = format(res)
            nums = [1 for r in res]

    cnt = 0
    output = []
    prev = "O"
    post = ""
    for token, n in zip(res, nums):
        if len(label) <= cnt:
            output.append((token, "O"))
            break
        assert len(set(
            label[cnt:cnt +
                  n])) == 1, "形態素とラベルが食い違っています\n{2}\n{0} : {1}".format(
                      token, label[cnt:cnt + len(token)], res)
        pre_token = ""

        if label[cnt] != "O" and (prev == "O" or prev != label[cnt]):
            pre_token = "B-"
        elif label[cnt] != "O" and prev == label[cnt]:
            pre_token = "I-"

        prev = label[cnt]

        output.append((token, pre_token + label[cnt]))
        cnt += n

    return output
    name = actor.find('{http://people.example.com}name')
    print(name.text)
    for char in actor.findall('{http://characters.example.com}character'):
        print(' |-->', char.text)

ns = {'real_person': 'http://people.example.com',
      'role': 'http://characters.example.com'}

for actor in root.findall('real_person:actor', ns):
    name = actor.find('real_person:name', ns)
    print(name.text)
    for char in actor.findall('role:character', ns):
        print(' |-->', char.text)

print("-------------------- XMLPullParser")
parser = ET.XMLPullParser(['start', 'end'])
parser.feed('<mytag>sometext')
print(list(parser.read_events()))

parser.feed(' more text</mytag>')
for event, elem in parser.read_events():
    print(event)
    print(elem.tag, 'text=', elem.text)

print("----------- XPath")
import xml.etree.ElementTree as ET

root = ET.fromstring(xml)

# Top-level elements
print(root.findall("."))
Example #23
0
def collect_data(FILE='map.xml', csv_output=True, max_size=1000000):
    '''
    This function collects all required data out of an OpenStreetMap-like XML-file **FILE**.
    (correct path to it) and returns the following:
        - **bounds** dictionary, where keys are 'maxlat', 'maxlon', 'minlat' and 'minlon'
        - **cameras** dictionary in form {..., node_id: [node_lat, node_lon], ... } if node_id refers to a camera
        - **street_nodes** in form {..., node_id: [node_lat, node_lon], ... }} if node_id refers to a street defining node
        - **streets** dictionary in form {..., street_id: [type, is_oneway, postal_code, name, node1, node2, ..], ... }
        - **postal_areas** dictionary in form {..., postal_code: [way1, way2, way3, ..], ... }
        - **area_lats** dictionary in form {..., postal_code: [node1_lat, node2_lat, ..], ... }
        - **area_lons** dictionary in form {..., postal_code: [node1_lon, node2_lon, ..], ... }
    It parses the XML-code incrementally in character bunches of length **max_size**.
    If csv output is wanted, it creates a new directory csv_%time with including files bounds.csv, cameras.csv, street_nodes.csv,
    streets.csv, areas.csv, area_lats.csv, area_lons.csv.
    '''

    # init return Data-Types
    bounds = {}
    cameras = {}
    street_nodes = {}
    streets = {}
    postal_areas = {}
    area_lats = {}
    area_lons = {}

    # init other Data containers
    nodes = {
    }  # collect all elements with tag 'node' here, dict of form {..., node_id: [node_lat, node_lon], ...}
    ways = {
    }  # collect all elements with tag 'way' here, dict of form {..., way_id: [node1, node2, ..], ...}
    street_nodes_set = set(
    )  # collect all node_ids that define any street in a set
    area_ways_set = set(
    )  # collect all way_ids that define any postal area border in a set

    # get current time
    time_now = time.strftime('%d.%m.%Y_%H.%M.%S')

    # init parser
    parser = et.XMLPullParser(['start', 'end'])

    try:
        if csv_output:
            # set paths of the output files
            head, tail = os.path.split(FILE)
            out_dir = os.path.join(head, 'csv_%s' % time_now)
            print('outdir:\t %s' % out_dir)
            if not os.path.isdir(out_dir):
                os.mkdir(out_dir)
            csv_files = map(lambda x: os.path.join(out_dir, x), [
                'bounds.csv', 'cameras.csv', 'street_nodes.csv', 'streets.csv',
                'areas.csv', 'area_nodes.csv', 'area_lats.csv', 'area_lons.csv'
            ])
        else:
            csv_files = None
        with open(FILE, 'r') as read_file:
            for csv_file in csv_files:
                # open files to write
                head, tail = os.path.split(csv_file)
                if tail == 'bounds.csv':
                    bound_file = open(csv_file, 'w')
                if tail == 'cameras.csv':
                    camera_file = open(csv_file, 'w')
                if tail == 'street_nodes.csv':
                    street_node_file = open(csv_file, 'w')
                if tail == 'streets.csv':
                    street_file = open(csv_file, 'w')
                if tail == 'areas.csv':
                    area_file = open(csv_file, 'w')
                if tail == 'area_nodes.csv':
                    area_node_file = open(csv_file, 'w')
                if tail == 'area_lats.csv':
                    area_lat_file = open(csv_file, 'w')
                if tail == 'area_lons.csv':
                    area_lon_file = open(csv_file, 'w')

            if csv_output:
                # init csv files to write
                bounds_csv = csv.writer(bound_file, delimiter=',')
                camera_csv = csv.writer(camera_file, delimiter=',')
                street_node_csv = csv.writer(street_node_file, delimiter=',')
                street_csv = csv.writer(street_file, delimiter=',')
                area_csv = csv.writer(area_file, delimiter=',')
                area_node_csv = csv.writer(area_node_file, delimiter=',')
                area_lat_csv = csv.writer(area_lat_file, delimiter=',')
                area_lon_csv = csv.writer(area_lon_file, delimiter=',')

            # init parsing variables
            root = None
            relevancy_level = 0
            elements_to_delete = []
            case_dict = None

            # Start with parsing...
            rep = 0
            while True:
                test_func = None
                rep += 1

                line = read_file.read(max_size)
                if not line:
                    break

                # feed the parser
                parser.feed(line)

                # iterate through all parsed elements
                for event, elem in parser.read_events():

                    if root is None:
                        root = elem

                    # get bounds of the given map excerpt
                    if elem.tag == 'bounds':
                        if event == "end":
                            output = []
                            for key in [
                                    'minlat', 'minlon', 'maxlat', 'maxlon'
                            ]:
                                bounds[key] = elem.attrib[key]
                                test_func = None
                                output.append(elem.attrib[key])
                            if csv_output:
                                bounds_csv.writerow(list(output))
                                bound_file.close()
                    # process all node tags with _get_camera
                    if elem.tag == 'node':
                        test_func = _get_camera
                        case_dict = cameras
                    # process all way tags with _get_street
                    if elem.tag == 'way':
                        test_func = _get_street
                        case_dict = streets
                    # process all relation tags with _get_relation
                    if elem.tag == 'relation':
                        test_func = _get_relation
                        case_dict = postal_areas

                    if not test_func:
                        continue

                    if event == "start":
                        if test_func(elem):
                            # in case of an opening tag:
                            # increase level if elem consists of required data (otherwise/ on level 0, elem would deleted before having read all its children)
                            relevancy_level += 1
                    if event == "end":
                        # in case of an ending tag:
                        # have processed complete element, so add it to list of elements to delete
                        elements_to_delete.append(elem)
                        if test_func(elem):
                            # decrease level if elem consists of required data (finally only on level 0, i. e. when all its child tags are read, it will be freed to delete)
                            relevancy_level -= 1
                            # get output of test_func
                            output = test_func(elem)
                            # in case of a node tag: get node coords and if it refers to a camera add it to the cameras dict
                            if case_dict == cameras:
                                is_cam = output[0]
                                id = output[1]
                                coordinates = output[2:]
                                nodes[id] = coordinates
                                if is_cam:
                                    assert (id not in cameras)
                                    cameras[id] = coordinates
                                    if csv_output:
                                        camera_csv.writerow(list(output[1:]))
                            # in case of a way tag:
                            elif case_dict == streets:
                                is_highway = output[0]
                                id = output[1]
                                info = output[2:6]
                                way_nodes = output[6:]
                                assert (id not in streets)
                                # only if way refers to a street add it to the streets dict
                                if is_highway:
                                    streets[id] = info + way_nodes
                                    # collect all nodes which are defining a street in the street_nodes set
                                    street_nodes_set.update(set(way_nodes))
                                    if csv_output:
                                        street_csv.writerow(list(output[1:]))
                                # otherwise save only its nodes in ways dict
                                ways[id] = way_nodes
                            # in case of a relation tag: get the bounding ways (ids) of an postal area and save it to the postal_areas dict
                            elif case_dict == postal_areas:
                                postal_code = output[0]
                                postal_ways = output[1:]
                                assert (postal_code not in postal_areas)
                                postal_areas[postal_code] = postal_ways
                                # collect all ways which are defining a postal area in the area_ways set
                                area_ways_set.update(set(postal_ways))
                                if csv_output:
                                    area_csv.writerow(list(output))

                # delete elements only when we parsed them completely (including all its children)
                if relevancy_level == 0:
                    for elem in elements_to_delete:
                        elem.clear()
                        if elem is not root:
                            root.clear()
                    elements_to_delete.clear()

        # save only coordinates of those nodes that are part of a street (and optional save it to file)
        street_nodes = {
            node_id: nodes[node_id]
            for node_id in street_nodes_set
        }
        street_nodes_set.clear()
        if csv_output:
            for (node_id, coords) in street_nodes.items():
                street_node_csv.writerow([node_id] + list(coords))

        # save only coordinates of those nodes that are defining a postal area boundary (and optional save it to file)
        area_ways = {
            way_id: ways[way_id]
            for way_id in area_ways_set if way_id in ways
        }
        area_ways_set.clear()

        # get all nodes that define some postal area in a set
        area_nodes_set = set()
        for node_list in area_ways.values():
            area_nodes_set.update(node_list)
        area_ways.clear()

        # collect coordinates to all area nodes in dict
        area_nodes = {
            node_id: nodes[node_id]
            for node_id in area_nodes_set if node_id in nodes
        }
        area_nodes_set.clear()
        if csv_output:
            for (node_id, coords) in area_nodes.items():
                area_node_csv.writerow([node_id] + list(coords))

        # for each postal_code area save all its defining node coords in two dicts (separately for all its lat resp. lon coordinates)
        # form will be e. g. area_lats = { ..., postal_code1: [node1_lat, node2_lat, ..], ...}
        for postal_code in postal_areas.keys():
            print('\nNew Postal code: %s' % postal_code)
            postal_node_lats = []
            postal_node_lons = []
            for way_id in postal_areas[postal_code]:
                if way_id in ways:
                    way_lats = []
                    way_lons = []
                    for node_id in ways[way_id]:
                        if node_id in nodes:
                            node_lat, node_lon = nodes[node_id]
                            way_lats.append(np.float_(node_lat))
                            way_lons.append(np.float_(node_lon))
                        else:
                            print('\t\tNode %s NOT in ways[%s].' %
                                  (node_id, way_id))
                    postal_node_lats = list(postal_node_lats) + list(way_lats)
                    postal_node_lons = list(postal_node_lons) + list(way_lons)
            area_lats[postal_code] = postal_node_lats
            area_lons[postal_code] = postal_node_lons

            if csv_output:
                area_lat_csv.writerow([postal_code] +
                                      list(area_lats[postal_code]))
                area_lon_csv.writerow([postal_code] +
                                      list(area_lons[postal_code]))

        # close all opened files
        if csv_output:
            camera_file.close()
            street_node_file.close()
            street_file.close()
            area_file.close()
            area_node_file.close()
            area_lat_file.close()
            area_lon_file.close()

        # return required data
        return bounds, cameras, street_nodes, streets, postal_areas, area_nodes, area_lats, area_lons

    except MemoryError:
        print('Out of Memory.')
def reset_pullParser():
    global pullParser
    global first
    pullParser = None
    pullParser = ET.XMLPullParser(["start", "end"])
    first = True
Example #25
0
def extract_glade(fileobj, keywords, comment_tags, options):
    """Extracts translatable strings from Glade files or GtkBuilder UI XML.

    :param fileobj: the file-like object to extract from, iterable by lines
    :param keywords: a list of translation keywords to extract, with the same
        names and meanings as C/Python i18n function names.
    :param comment_tags: a list of translator tags to search for and
        include in the results. This is ignored.
    :param options: a dictionary of additional options (optional)
    :return: An iterator over ``(lineno, funcname, message, comments)``
        tuples whose interpretation depends on ``funcname``.
    :rtype: iterator

    Properties must be marked translatable="yes". Context and comments
    attributes are respected. The yielded tuples are and returned as if
    you used gettext() or pgettext() in C or Python code. In other
    words, "gettext" or "pgettext" must be listed in ``keywords`` for
    contextless or context-bearing strings to be translated. The
    shorthand "_" and "C_" aliases from g18n.h are valid keywords too.

    Babel defaults to having "gettext" and "pgettext" in ``keywords``,
    so you don't normally need to worry about this.

    See also:

    * babel.messages.extract.extract()
    * http://babel.pocoo.org/en/latest/messages.html#writing-extraction-methods
    * https://www.gnu.org/software/gettext/manual/html_node/PO-Files.html

    """
    parser = etree.XMLPullParser(["end"])
    pgettext_wanted = ("pgettext" in keywords) or ("C_" in keywords)
    gettext_wanted = "gettext" in keywords
    truthy_values = [s.casefold() for s in ["yes", "true", "1", "y", "t"]]
    for line_idx, line_data in enumerate(fileobj):
        parser.feed(line_data)
        for event, elem in parser.read_events():
            assert event == "end"
            translatable_attr = elem.attrib.get("translatable", "no")
            if not translatable_attr.casefold() in truthy_values:
                continue

            comments = []
            if "comments" in elem.attrib:
                comments.append(elem.attrib["comments"])

            # Babel's interpretation of the yielded tuple depends on the
            # function name returned as part of it. This tells Babel what
            # the elements of the returned messages list or tuple mean.
            func_name = None
            if "context" in elem.attrib and pgettext_wanted:
                func_name = "pgettext"
                context = elem.attrib["context"]
                messages = [context, elem.text]
            elif gettext_wanted and "context" not in elem.attrib:
                # Returned strings are equivalent to a list or tuple
                # of length 1, like the arguments to C gettext()/_().
                func_name = "gettext"
                messages = elem.text

            if func_name is None:
                continue
            yield (line_idx + 1, func_name, messages, comments)
Example #26
0
 def __init__(self, *, target: BuilderBase = None):
     super().__init__(target=target)
     self.parser = ElementTree.XMLPullParser(["start", "end"])