def fulltext_search(self, query, rows=None, start=None):
        """Does an advanced search on fulltext:blah.
        You get back a pair (x,y) where x is the total # of hits
        and y is a list of identifiers like ["foo", "bar", etc.]"""

        query = self._prefix_query('fulltext', query)
        result_list = self.raw_search(query, rows=rows, start=start)
        e = ElementTree()
        try:
            e.parse(StringIO(result_list))
        except SyntaxError as e:
            raise SolrError(e)

        total_nbr_text = e.find('info/range_info/total_nbr').text
        # total_nbr_text = e.find('result').get('numFound')  # for raw xml
        total_nbr = int(total_nbr_text) if total_nbr_text else 0

        out = []
        for r in e.getiterator('hit'):
            for d in r.find('metadata'):
                for x in list(d.getiterator()):
                    if x.tag == "identifier":
                        xid = six.text_type(x.text).encode('utf-8')
                        if xid.startswith('OCA/'):
                            xid = xid[4:]
                        elif xid.endswith('.txt'):
                            xid = xid.split('/')[-1].split('_')[0]
                        elif xid.endswith('_ZZ'):
                            xid = xid[:-3]
                        out.append(xid)
                        break
        return (total_nbr, out)
Example #2
0
    def add_from_file(self, filename):
        '''parses xml file and stores wanted details'''
        Gtk.Builder.add_from_file(self, filename)

        # extract data for the extra interfaces
        tree = ElementTree()
        tree.parse(filename)

        ele_widgets = tree.getiterator("object")
        for ele_widget in ele_widgets:
            name = ele_widget.attrib['id']
            widget = self.get_object(name)

            # populate indexes - a dictionary of widgets
            self.widgets[name] = widget

            # populate a reversed dictionary
            self._reverse_widget_dict[widget] = name

            # populate connections list
            ele_signals = ele_widget.findall("signal")

            connections = [(name, ele_signal.attrib['name'],
                            ele_signal.attrib['handler'])
                           for ele_signal in ele_signals]

            if connections:
                self.connections.extend(connections)

        ele_signals = tree.getiterator("signal")
        for ele_signal in ele_signals:
            self.glade_handler_dict.update(
                {ele_signal.attrib["handler"]: None})
Example #3
0
    def add_from_file(self, filename):
        '''parses xml file and stores wanted details'''
        Gtk.Builder.add_from_file(self, filename)

        # extract data for the extra interfaces
        tree = ElementTree()
        tree.parse(filename)

        ele_widgets = tree.getiterator("object")
        for ele_widget in ele_widgets:
            name = ele_widget.attrib['id']
            widget = self.get_object(name)

            # populate indexes - a dictionary of widgets
            self.widgets[name] = widget

            # populate a reversed dictionary
            self._reverse_widget_dict[widget] = name

            # populate connections list
            ele_signals = ele_widget.findall("signal")

            connections = [
                (name,
                ele_signal.attrib['name'],
                ele_signal.attrib['handler']) for ele_signal in ele_signals]

            if connections:
                self.connections.extend(connections)

        ele_signals = tree.getiterator("signal")
        for ele_signal in ele_signals:
            self.glade_handler_dict.update(
            {ele_signal.attrib["handler"]: None})
Example #4
0
class LastParser(object):

    RSS_URL = "http://ws.audioscrobbler.com/2.0/user/{0}/recenttracks.rss"

    def __init__(self, user):
        self.tree = ElementTree()
        self.tree.parse(urllib2.urlopen(self.RSS_URL.format(user)))

    def get_songs(self, count=10):
        l = []
        for item in self.tree.getiterator("item"):
            d = {}
            for e in item:
                d[e.tag] = e.text
            l.append(d)
        return l[:count]
    
    def get_song(self):
        return self.get_songs(1)[0]

    def get_titles(self, count=10):
        l = [title.text for title in self.tree.getiterator("title")]
        return l[1:count + 1] # removing rss title

    def get_title(self):
        return self.get_titles(1)[0]
Example #5
0
    def pagetext_search(self, locator, query, rows=None, start=None):
        """Does an advanced search on
               pagetext:blah locator:identifier
        where identifier is one of the id's from fulltext search.
        You get back a list of page numbers like [21, 25, 39]."""
        def extract(page_id):
            """TODO: DjVu format is deprecated. Is this function
            still even used?
            A page id is something like
            'adventsuburbanit00butlrich_0065.djvu',
            which this function extracts asa a locator and
            a leaf number ('adventsuburbanit00butlrich', 65). """

            g = re.search(r'(.*)_(\d{4})\.djvu$', page_id)
            a, b = g.group(1, 2)
            return a, int(b)

        # try using qf= parameter here and see if it gives a speedup. @@
        # pdb.set_trace()
        query = self._prefix_query('pagetext', query)
        page_hits = self.raw_search(query,
                                    fq='locator:' + locator,
                                    rows=rows,
                                    start=start)
        XML = ElementTree()
        try:
            XML.parse(StringIO(page_hits))
        except SyntaxError as e:
            raise SolrError(e)
        page_ids = list(e.text for e in XML.getiterator('identifier'))
        return [extract(x)[1] for x in page_ids]
Example #6
0
    def fulltext_search(self, query, rows=None, start=None):
        """Does an advanced search on fulltext:blah.
        You get back a pair (x,y) where x is the total # of hits
        and y is a list of identifiers like ["foo", "bar", etc.]"""

        query = self._prefix_query('fulltext', query)
        result_list = self.raw_search(query, rows=rows, start=start)
        e = ElementTree()
        try:
            e.parse(StringIO(result_list))
        except SyntaxError as e:
            raise SolrError(e)

        total_nbr_text = e.find('info/range_info/total_nbr').text
        # total_nbr_text = e.find('result').get('numFound')  # for raw xml
        total_nbr = int(total_nbr_text) if total_nbr_text else 0

        out = []
        for r in e.getiterator('hit'):
            for d in r.find('metadata'):
                for x in list(d.getiterator()):
                    if x.tag == "identifier":
                        xid = six.text_type(x.text).encode('utf-8')
                        if xid.startswith('OCA/'):
                            xid = xid[4:]
                        elif xid.endswith('.txt'):
                            xid = xid.split('/')[-1].split('_')[0]
                        elif xid.endswith('_ZZ'):
                            xid = xid[:-3]
                        out.append(xid)
                        break
        return (total_nbr, out)
    def pagetext_search(self, locator, query, rows=None, start=None):
        """Does an advanced search on
               pagetext:blah locator:identifier
        where identifier is one of the id's from fulltext search.
        You get back a list of page numbers like [21, 25, 39]."""

        def extract(page_id):
            """TODO: DjVu format is deprecated. Is this function
            still even used?
            A page id is something like
            'adventsuburbanit00butlrich_0065.djvu',
            which this function extracts asa a locator and
            a leaf number ('adventsuburbanit00butlrich', 65). """

            g = re.search('(.*)_(\d{4})\.djvu$', page_id)
            a,b = g.group(1,2)
            return a, int(b)

        # try using qf= parameter here and see if it gives a speedup. @@
        # pdb.set_trace()
        query = self._prefix_query('pagetext', query)
        page_hits = self.raw_search(query,
                                    fq='locator:' + locator,
                                    rows=rows,
                                    start=start)
        XML = ElementTree()
        try:
            XML.parse(StringIO(page_hits))
        except SyntaxError as e:
            raise SolrError(e)
        page_ids = list(e.text for e in XML.getiterator('identifier'))
        return [extract(x)[1] for x in page_ids]
Example #8
0
    def __init__(self, name, file, parent=None):
        AWindow.__init__(self, name, file, parent)

        self.builder = Gtk.Builder()
        self.builder.add_from_file(self.file)
        self.item = self.builder.get_object(self.name)
        if self.parent is not None:
            self.item.set_transient_for(self.parent.item)

        self.types = {}
        for cls in self.classes:
            if hasattr(cls, 'type'):
                self.types[cls.type] = cls

        tree = ElementTree()
        tree.parse(self.file)
        ele_widgets = tree.getiterator("object")
        for ele_widget in ele_widgets:
            name = ele_widget.attrib['id']
            widget = self.builder.get_object(name)
            type = widget.__class__.__name__

            if type in self.types:
                self.widgets[name] = self.types[type](widget)
            else:
                self.other_widgets[name] = widget

        self.item.connect('delete-event', self.emit_closed)
        def normalizeXMLData(data):
            # Read in XML
            try:
                tree = ElementTree(file=StringIO.StringIO(data))
            except Exception:
                raise ValueError("Could not parse XML data")

            # Apply filters
            for filter in filters:
                for node in tree.getiterator(filter):
                    node.clear()
            return tostring(tree.getroot())
Example #10
0
        def normalizeXMLData(data):
            # Read in XML
            try:
                tree = ElementTree(file=StringIO.StringIO(data))
            except Exception:
                raise ValueError("Could not parse XML data")

            # Apply filters
            for filter in filters:
                for node in tree.getiterator(filter):
                    node.clear()
            return tostring(tree.getroot())
Example #11
0
    def __init__(self, filename, window):
        self.widgets = {}
        self.widgets = {}
        self.builder = Gtk.Builder()
        self.builder.add_from_file(filename)

        self.ShowWindow(window)

        tree = ElementTree()
        tree.parse(filename)

        ele_widgets = tree.getiterator("object")
        for ele_widget in ele_widgets:
            name = ele_widget.attrib['id']
            widget = self.builder.get_object(name)

            self.widgets[name] = widget
Example #12
0
    def __init__(self, result_xml):
        et = ElementTree()
        try:
            w = result_xml.encode('utf-8')
            def tx(a): return (type(a), len(a))
            et.parse(StringIO(w))
        except SyntaxError as e:
            ptb = traceback.extract_stack()
            raise SolrError(e, result_xml, traceback.format_list(ptb))
        range_info = et.find('info').find('range_info')

        def gn(tagname):
            return int(range_info.findtext(tagname))
        self.total_results = gn('total_nbr')
        self.begin = gn('begin')
        self.end = gn('end')
        self.results_this_page = gn('contained_in_this_set')

        self.result_list = list(str(a.text) \
                                for a in et.getiterator('identifier'))
    def __init__(self, result_xml):
        et = ElementTree()
        try:
            w = result_xml.encode('utf-8')
            def tx(a): return (type(a), len(a))
            et.parse(StringIO(w))
        except SyntaxError as e:
            ptb = traceback.extract_stack()
            raise SolrError(e, result_xml, traceback.format_list(ptb))
        range_info = et.find('info').find('range_info')

        def gn(tagname):
            return int(range_info.findtext(tagname))
        self.total_results = gn('total_nbr')
        self.begin = gn('begin')
        self.end = gn('end')
        self.results_this_page = gn('contained_in_this_set')

        self.result_list = list(str(a.text) \
                                for a in et.getiterator('identifier'))
Example #14
0
def install(language,
            directory=config.default_dict_path,
            repos=config.default_repository,
            use_description=True):
    '''
    Download  and install a dictionary file.
    language: a string of the form 'll_CC'. Example: 'en_US' for English, USA
    directory: the installation directory. Defaults to the
    value given in config.py. After installation this is the package root of 'hyphen'
    repos: the url of the dictionary repository. (Default: as declared in config.py;
    after installation of PyHyphen this is LibreOffice's GIT repository .).
    '''

    # Download the dictionaries.xcu file from the LibreOffice repository if needed
    if use_description:
        # first try  full language name; it won't work in all cases...
        language_ext_name = language
        descr_url = repos + language_ext_name + '/dictionaries.xcu'

        try:
            descr_file = urlopen(descr_url)
        except URLError:
            # OK. So try with the country code.
            language_ext_name = language[:2]
            descr_url = repos + language_ext_name + '/dictionaries.xcu'
            try:
                descr_file = urlopen(descr_url)
            except URLError:
                descr_file = None

    # Parse the xml file if it is present, and extract the data.
    if use_description and descr_file:
        descr_tree = ElementTree(file=descr_file)

        # Flag to catch the case that xcu file
        # does not refer to a hyphenation dict
        found_dict = False

        # Find the nodes containing meta data of hyphenation dictionaries
        # Iterate over all nodes
        for node in descr_tree.getiterator('node'):
            # Check if node relates to a hyphenation dict.
            # We assume this is the case if an attribute value
            # contains the substring 'hyph'
            node_values = [i[1] for i in node.items()]
            iter_values = [i for i in node_values if ('hyph' in i.lower())]

            # Install all available hyphen dictionairies
            for v in iter_values:
                # Found a hyphenation dict! So extract the data and construct the local record
                found_dict = True
                for property in node.getchildren():
                    prop_values = [j[1] for j in property.items()]
                    for pv in prop_values:
                        if pv.lower() == 'locations':
                            # Its only child's text is a list of strings of the form %origin%<filename>
                            # For simplicity, we only use the first filename in the list.
                            raw_dict_fn = property.getchildren()[0].text.split(
                            )[0]
                            dict_fn = raw_dict_fn[
                                9:]  # strip the prefix '%origin%'
                            dict_url = ''.join(
                                (repos, language_ext_name, '/', dict_fn))
                            break  # skip any other values of this property

                        elif pv.lower() == 'locales':
                            # Its only child's text is a list of locales.
                            dict_locales = property.getchildren(
                            )[0].text.replace('-', '_').split()

                            break  # skip any other values of this property

                # Install the dictionary file
                dict_str = urlopen(dict_url).read()
                filepath = directory + '/' + dict_fn
                with open(filepath, 'wb') as dict_file:
                    dict_file.write(dict_str)

                # Save the metadata
                # Generate a record for each locale, overwrite any existing ones
                new_dict = hyphen.DictInfo(dict_locales,
                                           filepath,
                                           url=dict_url)
                for l in dict_locales:
                    hyphen.dict_info[l] = new_dict

        # Catch the case that there is no hyphenation dict
        # for this language:
        if not found_dict:
            raise IOError('Cannot find hyphenation dictionary for language ' +
                          language + '.')

    # handle the case that there is no xml metadata
    else:
        # Download the dictionary guessing its URL
        dict_fn = ''.join(('hyph_dict_', language, '.dic'))
        dict_url = ''.join((repos, dict_fn))
        dict_str = urlopen(dict_url).read()
        filepath = directory + '/' + dict_fn
        with open(filepath, 'w') as dict_file:
            dict_file.write(dict_str)
        # Store the metadata
        new_dict = hyphen.DictInfo([language],
                                   filepath)  # the URL is thus set to None.
        hyphen.dict_info[language] = new_dict
    # Save the modified metadata
    save_dict_info()
Example #15
0
def install(language, directory = config.default_dict_path,
            repos = config.default_repository, use_description = True):
    '''
    Download  and install a dictionary file.
    language: a string of the form 'll_CC'. Example: 'en_US' for English, USA
    directory: the installation directory. Defaults to the
    value given in config.py. After installation this is the package root of 'hyphen'
    repos: the url of the dictionary repository. (Default: as declared in config.py;
    after installation of PyHyphen this is LibreOffice's GIT repository .).
    '''

    # Download the dictionaries.xcu file from the LibreOffice repository if needed
    if use_description:
        # first try  full language name; it won't work in all cases...
        language_ext_name = language
        descr_url = repos + language_ext_name + '/dictionaries.xcu'

        try:
            descr_file = urlopen(descr_url)
        except URLError: 
            # OK. So try with the country code.
            language_ext_name = language[:2]
            descr_url = repos + language_ext_name + '/dictionaries.xcu'
            try: 
                descr_file = urlopen(descr_url)
            except URLError:
                descr_file = None
            
    # Parse the xml file if it is present, and extract the data.     
    if   use_description and descr_file: 
        descr_tree = ElementTree(file = descr_file)

        # Flag to catch the case that xcu file
        # does not refer to a hyphenation dict
        found_dict = False
        
        # Find the nodes containing meta data of hyphenation dictionaries
        # Iterate over all nodes
        for node in descr_tree.getiterator('node'):
            # Check if node relates to a hyphenation dict.
            # We assume this is the case if an attribute value
            # contains the substring 'hyph'
            node_values = [i[1] for i in node.items()]
            iter_values = [i for i in node_values if ('hyph' in i.lower())]
            
            # Install all available hyphen dictionairies
            for v in iter_values:
                # Found a hyphenation dict! So extract the data and construct the local record
                found_dict = True
                for property in node.getchildren():
                    prop_values = [j[1] for j in property.items()]
                    for pv in prop_values:
                        if pv.lower() == 'locations':
                            # Its only child's text is a list of strings of the form %origin%<filename>
                            # For simplicity, we only use the first filename in the list.
                            raw_dict_fn = property.getchildren()[0].text.split()[0]
                            dict_fn = raw_dict_fn[9:] # strip the prefix '%origin%'
                            dict_url = ''.join((repos, language_ext_name, '/', dict_fn))
                            break # skip any other values of this property

                        elif pv.lower() == 'locales':
                            # Its only child's text is a list of locales.
                            dict_locales = property.getchildren()[0].text.replace('-', '_').split()

                            break # skip any other values of this property


                # Install the dictionary file
                dict_str = urlopen(dict_url).read()
                filepath = directory + '/' + dict_fn
                with open(filepath, 'wb')  as dict_file:
                    dict_file.write(dict_str)

                # Save the metadata
                # Generate a record for each locale, overwrite any existing ones
                new_dict = hyphen.DictInfo(dict_locales, filepath, url = dict_url)
                for l in dict_locales:
                    hyphen.dict_info[l] = new_dict
                    
        # Catch the case that there is no hyphenation dict
        # for this language:
        if not found_dict:
            raise IOError('Cannot find hyphenation dictionary for language ' + language + '.')


    # handle the case that there is no xml metadata
    else:
        # Download the dictionary guessing its URL
        dict_fn = ''.join(('hyph_dict_', language, '.dic'))
        dict_url = ''.join((repos, dict_fn))
        dict_str = urlopen(dict_url).read()
        filepath = directory + '/' + dict_fn
        with open(filepath, 'w')  as dict_file:
            dict_file.write(dict_str)
        # Store the metadata
        new_dict = hyphen.DictInfo([language], filepath) # the URL is thus set to None.
        hyphen.dict_info[language] = new_dict
    # Save the modified metadata
    save_dict_info()
import sys
from collections import namedtuple
from xml.etree.cElementTree import ElementTree
from xml.etree.cElementTree import ParseError


Contact = namedtuple('ContactRecord', 'first last age email')
try:
    tree = ElementTree().parse('results.xml')
except ParseError as e:
    print('Parse error: {err}'.format(err=e))
    sys.exit(42)

contacts = []

for contact in tree.getiterator('contact'):
    try:
        first = contact.find('.//first').text
        last = contact.find('.//last').text
        age = contact.find('./name').get('age')
        email = contact.find('.//email').text
        contacts.append(Contact(first, last, age, email))
    except AttributeError as e:
        print('Element error: {err}'.format(err=e))

print(contacts)