def _parse_result_set(self, result_set): """Internal method to parse a ResultSet node""" attributes = result_set.attributes if not attributes: raise parser.XMLError("ResultSet has no attributes") attr = attributes.getNamedItem('totalResultsAvailable') if attr: self._total_results_available = int(attr.nodeValue) else: raise parser.XMLError("ResultSet has no totalResultsAvailable attr") attr = attributes.getNamedItem('totalResultsReturned') if attr: self._total_results_returned = int(attr.nodeValue) else: raise parser.XMLError("ResultSet has no totalResultsReturned attr") attr = attributes.getNamedItem('firstResultPosition') if attr: self._first_result_position = int(attr.nodeValue) else: raise parser.XMLError("ResultSet has no firstRestultPosition attr") self._service._debug_msg("Results = %d / %d / %d", yahoo.search.debug.DEBUG_LEVELS['PARSING'], self._total_results_available, self._total_results_returned, self._first_result_position); for res in result_set.getElementsByTagName('Result'): self._results.append(self._parse_result(res))
def _parse_result(self, result): """Internal method to parse one Result node""" res = super(LocalSearch, self)._parse_result(result) node = result.getElementsByTagName('Distance') if node: unit = node[0].getAttribute('unit') if unit == "": unit = "miles" res['Distance'] = (self._get_text(node[0].childNodes), unit) else: raise parser.XMLError("LocalSearch DOM object has no Distance") node = result.getElementsByTagName('Rating') if node: res['Rating'] = self._tags_to_dict( node[0], (('AverageRating', None, float), ('TotalRatings', None, int), ('TotalReviews', None, int), ('LastReviewDate', 0, int), ('LastReviewIntro', "", None))) else: res['Rating'] = None node = result.getElementsByTagName('Categories') if node: res['Categories'] = self._parse_list_node(node[0], 'Category') else: res['Categories'] = None return res
def parse_results(self, dom_object): """Specialized DOM parser for LocalSearch, to allow for the Map URL in the result. """ super(LocalSearch, self).parse_results(dom_object) try: url_node = dom_object.getElementsByTagName('ResultSetMapUrl') self._result_set_map_url = self._get_text(url_node[0].childNodes) except: raise parser.XMLError("DOM object has no ResultSetMapUrl")
def parse_results(self, dom_object): """This is a simple DOM parser for all Yahoo Search services. It expects to find a top-level node named ResultSet. This is the main entry point for the DOM parser, and it requires a properly con- structed DOM object (e.g. using minidom). """ try: result_set = dom_object.getElementsByTagName('ResultSet')[0] except: raise parser.XMLError("DOM object has no ResultSet") self._parse_result_set(result_set)
def _tags_to_dict(self, node, tags, parse_id=True): """This specialized version will convert the "id" attribute of the tag to an attribute. """ res = super(_AudioParser, self)._tags_to_dict(node, tags) if parse_id: attr = node.attributes.getNamedItem('id') if attr: res['Id'] = str(attr.nodeValue) else: raise parser.XMLError("Result has no id attr") return res
def _parse_result(self, result): """Internal method to parse one Result node""" res = super(ImageSearch, self)._parse_result(result) node = result.getElementsByTagName('Thumbnail') if node: res['Thumbnail'] = self._tags_to_dict(node[0], (('Url', None, None), ('Height', 0, int), ('Width', 0, int))) else: raise parser.XMLError("ImageSearch DOM object has no Thumbnail") return res
def _id_attribute_to_dict(self, node): """Internal method to parse and extract a node value, which has an "id" attribute as well. This will return a result dict with two values: { 'Name' : <node-text>, 'Id' : <id attribute> } """ res = self._res_dict() res['Name'] = self._get_text(node.childNodes) node_id = node.attributes.getNamedItem('id') if node_id: res['Id'] = str(node_id.nodeValue) else: raise parser.XMLError("%s node has no id attribute" % node.nodeName) return res
def _tags_to_dict(self, node, tags): """Internal method to parse and extract a list of tags from a particular node. We return a dict, which can potentially be empty. The tags argument is a list of lists, where each sub-list is (tag-name, default value/None, casting function/None) The default "type" of a value is string, so there is no reason to explicitly cast to a str. """ res = self._res_dict() for tag in tags: elem = node.getElementsByTagName(tag[0]) if elem: val = self._get_text(elem[0].childNodes, tag[2]) elif tag[1] is not None: val = tag[1] else: raise parser.XMLError("Result is missing a %s node" % tag[0]) res[tag[0]] = val return res