Beispiel #1
0
 def test_DictCharWidget_renders_fieldset_with_label_and_field_names(self):
     names = [factory.make_string(), factory.make_string()]
     initials = []
     labels = [factory.make_string(), factory.make_string()]
     values = [factory.make_string(), factory.make_string()]
     widget = DictCharWidget(
         [widgets.TextInput, widgets.TextInput, widgets.CheckboxInput],
         names,
         initials,
         labels,
         skip_check=True,
     )
     name = factory.make_string()
     html_widget = fromstring("<root>" + widget.render(name, values) +
                              "</root>")
     widget_names = XPath("fieldset/input/@name")(html_widget)
     widget_labels = XPath("fieldset/label/text()")(html_widget)
     widget_values = XPath("fieldset/input/@value")(html_widget)
     expected_names = [
         "%s_%s" % (name, widget_name) for widget_name in names
     ]
     self.assertEqual(
         [expected_names, labels, values],
         [widget_names, widget_labels, widget_values],
     )
def download_frm_scirp(doc_list):
    title_query = XPath('.//article-title')
    abstract_query = XPath('.//abstract/p')
    body_query = XPath('.//body/sec')
    strip_elements_list = ['xref', 'title', 'b', 'sup', 'table-wrap']
    logger.info("Will retry processing for {} scirp faulty files".format(
        len(doc_list)))
    result_dict_list = []
    for file_name, doc_id in doc_list:
        logger.debug('Processing : {}'.format(file_name))
        urlfile = "https://www.scirp.org/xml/{}.xml".format(str(doc_id))
        try:
            response = requests.get(urlfile)
            if response.status_code == 200:
                xml_text = response.content
                xml_text = re.sub('<sup>|</sup>|<b>|</b>', '',
                                  xml_text.decode('utf-8')).encode('utf-8')
                result_dict = get_data_from_xml(
                    xml_text=xml_text,
                    title_query=title_query,
                    abstract_query=abstract_query,
                    body_query=body_query,
                    strip_elemnts_list=strip_elements_list,
                    strip_tags_list=[],
                    namespaces=None)
                result_dict_list.append(result_dict)
                with open(file_name, 'wb') as file:
                    file.write(xml_text)
        except Exception as e:
            logger.info("No substitute XML exists for {}".format(file_name))
    return result_dict_list
def area_codes():
    # Need to Get page first to obtain key data
    parsed_body = pm.get_page(home_url)

    # extract data from page create form to send back with post
    form = pm.get_formDataCuric(parsed_body)

    form.update(
        {
            'ctl00$ctl00$countdowntimer2$hdnCountdowntime': '0',
            'ctl00$ctl00$ddlAcademicYear': searchYear,
            'ctl00$ctl00$cphContent$ContentPlaceHolder1$ddlStructureLevel': '2',
            'ctl00$ctl00$cphContent$ContentPlaceHolder1$txtStructureCode': '',
            'ctl00$ctl00$cphContent$ContentPlaceHolder1$txtStructureTitle': '',
            'ctl00$ctl00$cphContent$ContentPlaceHolder1$btnSearch': 'Search',
            'hiddenInputToUpdateATBuffer_CommonToolkitScripts': '1'
         }
    )

    # Post for with key search data, this data should be static a my only need update occasionally
    # need to check if it is stored before sending a request
    parsed_body = pm.post_page(home_url, form)
    # key areas to get data from
    rows_xpath = XPath('//*[@id="ctl00_ctl00_cphContent_ContentPlaceHolder1_gvStructureSearch"]//tr/td/a')
    href_xpath = XPath('@href')
    title_xpath = XPath('text()')
    courses = []
    for row in rows_xpath(parsed_body):
        courses.append({
            "href": href_xpath(row)[0].split("../")[-1], 
            # "structureid": href_xpath(row)[0].split("../")[-1].split("=", 1)[-1].split('&')[0],   
            # "AcademicYearID" :  href_xpath(row)[0].split("../")[-1].split("=", 1)[-1].split('&')[1],       
            "title":title_xpath(row)[0]
        })
    return courses
Beispiel #4
0
class AccountLifeInsurance(IsinMixin, Page):
    _re_isin = re.compile(r'isin=(\w+)')
    _tr_list = XPath(
        '//div[@id="content-gauche"]//table[@class="list"]/tbody/tr')
    _td_list = XPath('./td')
    _link = XPath('./td[1]/a/@href')

    def get_investment(self):
        for tr in self._tr_list(self.document):
            cells = list(el_to_string(td) for td in self._td_list(tr))
            link = unicode(self._link(tr)[0])
            '''

            Boursorama table cells
            ----------------------

            0. Fonds
            1. Date de valeur
            2. Valeur de part
            3. Nombre de parts
            4. Contre valeur
            5. Prix revient
            6. +/- value en €*
            7. +/- value en %*

            Investment model
            ----------------

            label =       StringField('Label of stocks')
            code =        StringField('Identifier of the stock (ISIN code)')
            description = StringField('Short description of the stock')
            quantity =    IntField('Quantity of stocks')
            unitprice =   DecimalField('Buy price of one stock')
            unitvalue =   DecimalField('Current value of one stock')
            valuation =   DecimalField('Total current valuation of the Investment')
            diff =        DecimalField('Difference between the buy cost and the current valuation')

            '''

            inv = Investment()
            isin = self.get_isin(link)

            if isin:
                inv.id = inv.code = isin
            inv.label = cells[0]
            inv.quantity = Decimal(cells[3])
            inv.valuation = Decimal(cells[4])
            inv.unitprice = Decimal(cells[5])
            inv.unitvalue = Decimal(cells[2])
            inv.diff = Decimal(cells[6])

            inv._detail_url = link if '/cours.phtml' in link else None

            yield inv

    def get_valuation_diff(self, account):
        account.valuation_diff = Decimal(
            self.document.xpath(
                '//td[contains(text(), "Total des +/- values **")]/following-sibling::*[1]'
            ))
Beispiel #5
0
 def generate(self, nsmap):
     attrs = OrderedDict()
     dt = self.mapped_datatype
     if self.used:
         if isinstance(self.range, CIMEnum):
             var, query_base = self.name_query()
             attrs[f"{var}_name"] = Column(String(120), ForeignKey(CIMEnumValue.name), name=f"{var}_name")
             attrs[var] = relationship(CIMEnumValue,
                                       foreign_keys=attrs[f"{var}_name"])
             self.key = f"{var}_name"
             self.xpath = XPath(query_base + "/@rdf:resource", namespaces=nsmap)
         elif self.range:
             self.generate_relationship(nsmap)
         elif not self.range:
             var, query_base = self.name_query()
             log.debug(f"Generating property for {var} on {self.name}")
             self.key = var
             self.xpath = XPath(query_base + "/text()", namespaces=nsmap)
             if dt:
                 if dt == "String":
                     attrs[var] = Column(String(50), name=f"{var}")
                 elif dt in ("Float", "Decimal"):
                     attrs[var] = Column(Float, name=f"{var}")
                 elif dt == "Integer":
                     attrs[var] = Column(Integer, name=f"{var}")
                 elif dt == "Boolean":
                     attrs[var] = Column(Boolean, name=f"{var}")
                 else:
                     attrs[var] = Column(String(30), name=f"{var}")
             else:
                 # Fallback to parsing as String(50)
                 attrs[var] = Column(String(50), name=f"{var}")
     for attr, attr_value in attrs.items():
         setattr(self.cls.class_, attr, attr_value)
Beispiel #6
0
 def __init__(self):
     ns = {'c': 'https://toolkit.cit-ec.uni-bielefeld.de/CITKat'}
     self.xpath_has_other_versions = XPath(
         "/c:catalog/child::node()[not(@version = $version) and "
         "c:filename = concat($filename_wo_version, '-', translate(@version, '#', '_'))]",  # TODO: needs better escaping of all special uri-chars
         namespaces=ns)
     self.xpath_get_version = XPath('/c:catalog/child::node()/@version', namespaces=ns)
Beispiel #7
0
def students_more_details(pmstudentid):
    url = '/ILP/Information/furtherdetails.aspx?pmstudentid='
    page = pm.get_page(url + pmstudentid)
    return {
        'Nok Mobile':
        check_list(
            XPath('//*[@id="Content_Content_txtNextOfKinMobileValue"]/text()')(
                page)),
        'Nok':
        check_list(
            XPath(
                '//*[@id="Content_Content_txtNextOfKin_Spellchecker_cccTextArea"]/text()'
            )(page)).strip('\r\n'),
        'Notes':
        check_list(
            XPath(
                '//*[@id="Content_Content_txtNotes_Spellchecker_cccTextArea"]/text()'
            )(page)).strip('\r\n'),
        'Nok Email':
        check_list(
            XPath('//*[@id="Content_Content_txtNextOfKinEmail"]/@value')(
                page)),
        'ULN':
        check_list(XPath('//*[@id="Content_Content_txtULN"]/text()')(page))
    }
Beispiel #8
0
    def test_class_attributes(self):
        try:
            XPath(self.shelter.animal_url)
            XPath(self.shelter.next_url)
        except XPathSyntaxError as e:
            self.fail(e.msg)

        url = urlparse(self.shelter.start_url)
        self.assertIn(url.scheme, ('http', 'https'))
        self.assertTrue(url.netloc)
Beispiel #9
0
 def _generateXPathMap(cls):
     super()._generateXPathMap()
     Map = {
         "category": XPath(r"cims:belongsToCategory/@rdf:resource", namespaces=cls.nsmap),
         "stereotype": XPath(r"cims:stereotype/text()", namespaces=cls.nsmap)
     }
     if not cls.XPathMap:
         cls.XPathMap = Map
     else:
         cls.XPathMap = {**cls.XPathMap, **Map}
Beispiel #10
0
 def _generateXPathMap(cls):
     super()._generateXPathMap()
     Map = {
         "domain": XPath(r"rdfs:domain/@rdf:resource", namespaces=cls.nsmap),
         "multiplicity": XPath(r"cims:multiplicity/@rdf:resource", namespaces=cls.nsmap)
     }
     if not cls.XPathMap:
         cls.XPathMap = Map
     else:
         cls.XPathMap = {**cls.XPathMap, **Map}
Beispiel #11
0
class InvestmentDetail(IsinMixin, Page):
    _re_isin = re.compile('(\w+)')
    _isin = XPath('//h2[@class and contains(concat(" ", normalize-space(@class), " "), " fv-isin ")]')
    _description = XPath('//p[@class="taj"] | //div[@class="taj"]')

    def get_investment_detail(self, inv):
        subtitle = el_to_string(self._isin(self.document)[0])

        inv.id = inv.code = self.get_isin(subtitle)
        inv.description = el_to_string(self._description(self.document)[0]).strip()
def determine_sentence_type(xml_tree):
    if len(XPath(refused_xpath_spec)(xml_tree)) > 0:
        return SentenceType.refused
    elif len(XPath(upheld_xpath_spec)(xml_tree)) > 0:
        return SentenceType.upheld
    elif len(XPath(without_cause_xpath_spec)(xml_tree)) > 0:
        return SentenceType.without_cause
    elif len(XPath(predictive_text_xpath_spec)(xml_tree)) > 0:
        return SentenceType.predictive_text
    else:
        return SentenceType.irrelevant
Beispiel #13
0
 def check_recommended(self, ds):
     return [
         ('sensor_descriptions',
          XPath(
              "/sml:SensorML/sml:member/sml:System/sml:components/sml:ComponentList/sml:component/sml:System/gml:description",
              namespaces=self.ns)),
         ('sensor_ids',
          XPath(
              "/sml:SensorML/sml:member/sml:System/sml:components/sml:ComponentList/sml:component/sml:System/@gml:id",
              namespaces=self.ns)),
         ('sensor_names',
          XPath(
              "/sml:SensorML/sml:member/sml:System/sml:components/sml:ComponentList/sml:component/@name",
              namespaces=self.ns)),
         ('data_format_template_version',
          XPath(
              "/sml:SensorML/sml:capabilities/swe:SimpleDataRecord/swe:field[@name='ioosTemplateVersion']/swe:Text/swe:value",
              namespaces=self.ns)),
         ('variable_names',
          XPath(
              "/sml:SensorML/sml:member/sml:System/sml:components/sml:ComponentList/sml:component/sml:System/sml:outputs/sml:OutputList/sml:output/swe:Quantity/@definition",
              namespaces=self.ns)),
         ('variable_units',
          XPath(
              "/sml:SensorML/sml:member/sml:System/sml:components/sml:ComponentList/sml:component/sml:System/sml:outputs/sml:OutputList/sml:output/swe:Quantity/swe:uom/@code",
              namespaces=self.ns)),
         ('network_id',
          XPath(
              "/sml:SensorML/sml:member/sml:System/sml:capabilities[@name='networkProcedures']/swe:SimpleDataRecord/gml:metaDataProperty/@xlink:href",
              namespaces=self.ns)),
         ('operator_sector',
          XPath(
              "/sml:SensorML/sml:member/sml:System/sml:classification/sml:ClassifierList/sml:classifier[@name='operatorSector']/sml:Term/sml:value",
              namespaces=self.ns)),
     ]
def student_info(pmstudentid):
    
    url = "/ilp/information/details.aspx?pmstudentid="
    page = pm.get_page(url + pmstudentid)

    common = '//*[@id="ctl00_ctl00_cphContent_ContentPlaceHolder1_'

    return {
        'dob': check_list(XPath(common + 'txtDOB"]/text()')(page)),
        'address': XPath(common + 'txtAddress"]/text()')(page),
        'telephone': check_list(XPath(common + 'txtTelephone"]/text()')(page)),
        'mobile': check_list(XPath(common + 'txtMobile"]/text()')(page)),
        'email': check_list(XPath(common + 'lnkEmail"]/text()')(page))
    }
def course_codes(url):
    parsed_body = pm.get_page('/' + url)
    
    rows_xpath = XPath('//*[@id="ctl00_ctl00_cphContent_ContentPlaceHolder1_gvStudentGroups"]//tr[td[3]//text()!="0"]') 
    href_xpath = XPath('td[1]/a/@href')
    title_xpath = XPath('td[1]//text()')
    students_xpath = XPath('td[3]//text()')
    courses = []
    for row in rows_xpath(parsed_body):
        courses.append({
            "href": href_xpath(row)[0].split("../")[-1],                
            "students": students_xpath(row)[0],
            "title":title_xpath(row)[0]
        })
    return courses
Beispiel #16
0
 def _generateXPathMap(cls):
     super()._generateXPathMap()
     Map = {"isFixed": XPath(r"cims:isFixed/@rdfs:Literal", namespaces=cls.nsmap)}
     if not cls.XPathMap:
         cls.XPathMap = Map
     else:
         cls.XPathMap = {**cls.XPathMap, **Map}
Beispiel #17
0
 def generate_relationship(self, nsmap=None):
     var, query_base = self.name_query()
     attrs = {}
     Map = {}
     log.debug(f"Generating relationship for {var} on {self.name}")
     if self.many_remote:
         if self.inverse:
             br = self.inverse.label if self.namespace == "cim" else self.namespace + "_" + self.inverse.label
             tbl = self.generate_association_table()
             self.association_table = tbl
             attrs[var] = relationship(self.range.label,
                                       secondary=tbl,
                                       backref=br)
         else:
             tbl = self.generate_association_table()
             attrs[var] = relationship(self.range.label,
                                       secondary=tbl)
     else:
         attrs[f"{var}_id"] = Column(String(50),
                                     ForeignKey(f"{self.range.label}.id"),
                                     name=f"{var}_id")
         if self.inverse:
             br = self.inverse.label if self.namespace == "cim" else self.namespace+"_"+self.inverse.label
             attrs[var] = relationship(self.range.label,
                                       foreign_keys=attrs[f"{var}_id"],
                                       backref=br)
         else:
             attrs[var] = relationship(self.range.label,
                                       foreign_keys=attrs[f"{var}_id"])
         self.key = f"{var}_id"
     self.xpath = XPath(query_base + "/@rdf:resource", namespaces=nsmap)
     class_ = self.cls.class_
     for attr, attr_value in attrs.items():
         setattr(class_, attr, attr_value)
     return Map
Beispiel #18
0
 def opt_filter(self, my_filter):
     try:
         # Weed out invalid filters
         XPath(my_filter)
     except XPathSyntaxError:
         raise usage.UsageError("Invalid XPath expression: %s" % my_filter)
     self['filters'].append(my_filter)
Beispiel #19
0
    def parse_scan_results(self, data):

        html = lxml.html.fromstring(data)
        compliance_failed_xpath = XPath("//span[contains(text(), 'FAILED')]")
        compliance_failed = len(compliance_failed_xpath(html))

        return compliance_failed
Beispiel #20
0
 def _generateXPathMap(cls):
     super()._generateXPathMap()
     Map = {"type": XPath(r"rdf:type/@rdf:resource", namespaces=cls.nsmap)}
     if not cls.XPathMap:
         cls.XPathMap = Map
     else:
         cls.XPathMap = {**cls.XPathMap, **Map}
Beispiel #21
0
    def valid_xpath(self, to_validate=''):
        """
        Check to see if an xpath is valid with a boolean return.

        Notes:
            If the optional parameter "to_validate" is not used,
            the calling objects own locator is used instead.

        Args:
            to_validate (str): A (hopefully) valid xpath when you
            don't want to use the the calling objects own locator.

        Returns:
            bool: True if the string is valid xpath, False if not.

        """
        valid = False

        try:
            if not to_validate:
                to_validate = self.locator

            valid = bool(XPath(to_validate).path)
        except XPathSyntaxError:
            pass

        return valid
Beispiel #22
0
    def select(self, xpath=None, pyquery=None):
        start = time.time()
        
        if xpath is None and pyquery is None:
            raise Exception('Both xpath and pyquery option are None')

        if xpath is not None and pyquery is not None:
            raise Exception('Both xpath and pyquery option are not None')

        if xpath is not None:
            if not xpath in XPATH_CACHE:
                obj = XPath(xpath)
                XPATH_CACHE[xpath] = obj
            xpath_obj = XPATH_CACHE[xpath]

            val = self.wrap_list(xpath_obj(self.node), 'xpath', xpath)
            query_exp = xpath
        else:
            val = self.wrap_list(self.pyquery_node().find(pyquery), 'pyquery', pyquery)
            query_exp = pyquery

        total = time.time() - start
        if DEBUG_LOGGING:
            logger.debug(u'Performed query [%s], elements: %d, time: %.05f sec' % (query_exp, len(val), total))
        GLOBAL_STATE['selector_time'] += total

        return val
Beispiel #23
0
def extract_uitspraak(ruling_tree):
    from lxml.etree import XPath

    xpath_strs = [
        "/open-rechtspraak/rvr:uitspraak/rvr:section//rvr:*/text()[contains(., 'DE UITSPRAAK')]/"
        "ancestor::rvr:section/descendant-or-self::text() | "
        "/open-rechtspraak/rvr:uitspraak/rvr:section//rvr:*/text()[contains(., 'DE UITSPRAAK')]/"
        "ancestor::rvr:section/following-sibling::rvr:section/descendant-or-self::text()",
        '/open-rechtspraak/rvr:uitspraak/rvr:section[@role="beslissing"][last()]/descendant-or-self::text()',
        "/open-rechtspraak/rvr:uitspraak/rvr:section/rvr:title/text()[contains(., 'eslissing')][last()]//"
        "ancestor::rvr:section/descendant-or-self::text()"
    ]

    xpaths = (XPath(xpath_str, namespaces=NAMESPACE_PREFIX_MAP)
              for xpath_str in xpath_strs)
    items = tuple('\n'.join(xpath(ruling_tree)) for xpath in xpaths)

    if len(items) >= 1:
        if len(items) <= 3:
            return filter_out_wijzers(clean(
                items[0]))  # TODO: if != '' ; precedence of XPaths
        elif len(items) > 3:
            assert False
    else:
        return None
Beispiel #24
0
def extract_standpunt_adv(ruling_tree):
    from lxml.etree import XPath

    xpath_strs = [
        "/open-rechtspraak/rvr:uitspraak/rvr:section//rvr:parablock/rvr:para/"
        "descendant-or-self::*[contains(text(), 'standpunt van de verdediging')]/"
        "ancestor::rvr:para/following-sibling::rvr:para/text()",
        "/open-rechtspraak/rvr:uitspraak/rvr:section//rvr:paragroup/"
        "descendant-or-self::*[contains(text(), 'standpunt van de Verdediging') "  # TODO: unneeded
        "or contains(text(), 'standpunt van de verdediging')"
        "or contains(text(), 'standpunt van verdediging')]/"  # TODO: unneeded
        "parent::*/descendant::rvr:parablock/descendant::*/text()"
    ]  # ,
    # "/open-rechtspraak/rvr:uitspraak/rvr:section//rvr:para/"
    # "rvr:emphasis[text()='Het standpunt van de verdediging']/"
    # "ancestor::rvr:para/following-sibling::rvr:*/text()"]

    xpaths = (XPath(xpath_str, namespaces=NAMESPACE_PREFIX_MAP)
              for xpath_str in xpath_strs)
    items = tuple('\n'.join(xpath(ruling_tree)) for xpath in xpaths)

    if len(items) == 1:
        return clean(items[0])
    elif len(items) > 1:
        assert False
    else:
        return None
def xpath_results(node, xpath_expr):
    if xpath_expr:
        try:
            finder = XPath(xpath_expr, namespaces=REGEXPNAMESPACE)
            return finder(node)
        except Exception, ex:
            raise Exception((ex, xpath_expr))
Beispiel #26
0
def get_xpath(xpath_spec):
    """Return cached compiled XPath

    There is no thread lock.
    Worst case scenario, xpath_str is compiled more than one time.

    Args:
        * xpath_spec (str|lxml.etree.XPath): XPath as a str or lxml.etree.XPath

    Returns:
        * result (bool, float, list, str): Results.

    Raises:
        * TypeError: Raise when xpath_spec is neither a str nor a lxml.etree.XPath
        * SearxXPathSyntaxException: Raise when there is a syntax error in the XPath
    """
    if isinstance(xpath_spec, str):
        result = xpath_cache.get(xpath_spec, None)
        if result is None:
            try:
                result = XPath(xpath_spec)
            except XPathSyntaxError as e:
                raise SearxXPathSyntaxException(xpath_spec, str(e.msg)) from e
            xpath_cache[xpath_spec] = result
        return result

    if isinstance(xpath_spec, XPath):
        return xpath_spec

    raise TypeError('xpath_spec must be either a str or a lxml.etree.XPath')
Beispiel #27
0
    def accept_expr(self):
        method = 'csssel' if self.rb1.isChecked() else 'xpath'
        expr = self.le_expr.text().strip()
        if not expr:
            QMessageBox.warning(self, "警告", '表达式不可为空', QMessageBox.Cancel)
            self.le_expr.setFocus()
        elif method == 'csssel':
            try:
                self.state['select'] = CSSSelector(expr)
                self.close()
            except:
                QMessageBox.warning(self, "警告", '错误的 CSS选择器 表达式',
                                    QMessageBox.Cancel)
                self.le_expr.setFocus()
        elif method == 'xpath':
            try:
                self.state['select'] = XPath(expr)
                self.close()
            except:
                QMessageBox.warning(self, "警告", '错误的 XPath 表达式',
                                    QMessageBox.Cancel)
                self.le_expr.setFocus()
        else:
            raise NotImplementedError('unsupported method %r' % expr)

        self.state['numfmt'] = self.le_numfmt.text()

        self.state['only_modify_text'] = self.cb1.isChecked()
        self.state['unique_strategy'] = 'inepub' if self.cb2.isChecked(
        ) else 'inhtml'
def evaluateXPath(path, element):
    try:
        import xml.dom
        from xml.xpath import Evaluate
        result=Evaluate(path, element)
        if hasattr(result,'__iter__'):
            for i in range(len(result)):
                if isinstance(result[i], xml.dom.Node) and result[i].nodeType == xml.dom.Node.ATTRIBUTE_NODE:
                    result[i]=result[i].value
        elif type(result)==bool:
            return result
        else:
            result=[result]
        return result
    except ImportError:
        # Implementation for etree
        from lxml.etree import XPath, fromstring, tounicode
        # returns a list of _ElementStringResult
        buf=toPrettyXML(element)
        elist=XPath(path).evaluate(fromstring(buf))
        nodelist=list()
        # if is iterable
        if hasattr(elist,'__iter__'):
            for eelement in elist:
                # either the returnlist is a stringlist or a element list
                if isinstance(eelement, basestring):
                    nodelist.append(eelement)
                else:
                    nodelist.append(parseXMLString(tounicode(eelement)).documentElement)
        elif type(elist)==bool:
            return elist
        else:
            nodelist.append(elist)
        return nodelist
Beispiel #29
0
def selector_converter(selector):
    '''
    Create a selector out of a string or number. If the input is a string,
    a CSS or XPath selector are created, if the input is a number or a
    tuple/list of numbers, the selector will be turned into a slice selector.

    Applies basic parsing ond the selector, allowing css query grouping.
    (\w+ > \w+) ~ \w+ will result in a sibling selection of the parent element in
    the left part of the parenthesized selector.
    '''
    parent_sibling = '\((.*)\s*>\s*(.*)\s*\)\s*~\s*(.*)'
    '''
    if re.match(parent_sibling, selector):
        parent, child, sibling = re.match(parent_sibling, selector).groups()
        selector = lambda x: [el.getparent().getnext() for el in css(parent+'>'+child)(x)]
        return selector
    '''
    if selector:
        if type(selector) == int:
            return SliceSelector((selector, ))
        if type(selector) in (list, tuple):
            return SliceSelector(selector)
        if type(selector) == lxml.cssselect.CSSSelector:
            return selector
        try:
            return lxml.cssselect.CSSSelector(selector)
        except lxml.cssselect.SelectorSyntaxError:
            return XPath(selector)
        except:
            raise Exception('This value for a selector was not understood',
                            selector)
Beispiel #30
0
 def compile_selector(self, expr, default_type):
     """
     Compiles a single selector string to ``(selector_type,
     selector_object, expression_string, attributes)`` where the
     selector_type is a string (``"elements"``, ``"children"``,
     etc), selector_object is a callable that returns elements,
     expression_string is the original expression, passed in, and
     ``attributes`` is a list of attributes in the case of
     ``attributes(attr1, attr2):``
     """
     type, attributes, rest_expr = self.parse_prefix(
         expr, default_type=default_type)
     if not self.types_compatible(type, self.major_type):
         raise DeliveranceSyntaxError(
             "Expression %s in selector %r uses the type %r, but this is not "
             "compatible with the type %r already declared earlier in the selector"
             % (expr, self, type, self.major_type))
     if rest_expr.startswith('/'):
         selector = XPath(rest_expr)
     else:
         try:
             selector = CSSSelector(rest_expr)
         except AssertionError as e:
             raise DeliveranceSyntaxError('Bad CSS selector: "%s" (%s)' %
                                          (expr, e))
     return (type, selector, expr, attributes)