Example #1
0
 def __convert_drug(self, node):
     drug = Drug()
     drug.name = xpath.findvalue('name', node)
     drug.indication = xpath.findvalue('indication', node)
     drug.fda_product_id = xpath.findvalue('external-identifiers/external-identifier[starts-with(resource, "National Drug Code Directory")]/identifier', node)
     drug.wikipedia = xpath.findvalue('external-links/external-link[starts-with(resource, "Wikipedia")]/url', node)
     if not drug.fda_product_id:
         return
     print str(drug)
 def parse_response(self, resp):
     resp_dict = {"imported": 0, "ignored": 0, "updated": 0}
     try:
         doc = parseString(resp)
         status = xpath.findvalue('//status', doc)
         imported = xpath.findvalue('//dataValueCount[1]/@imported', doc)
         ignored = xpath.findvalue('//dataValueCount[1]/@ignored', doc)
         updated = xpath.findvalue('//dataValueCount[1]/@updated', doc)
         #conflicts = xpath.find('//conflict', doc)
         resp_dict = {"status": status, "imported": imported, "ignored": ignored, "updated": updated}
     except Exception, e:
         return False, "%s" % str(e)
Example #3
0
File: fifa.py Project: xczimi/xpool
def get_games(stage):
    dom = get_dom(stage)
    games = []
    for groupdom in dom:
        group_name = xpath.findvalue('caption',groupdom)
        for match in xpath.find('tbody/tr',groupdom):
            new_game = {'id' : xpath.findvalue('td[contains(@class,"mNum")]',match),
                        'group' : group_name ,
                        'time' : timestring_to_datetime(xpath.findvalue('td[contains(@class,"dt")]/span/@title',match).split(",")[1].encode('utf-8')) ,
                        'location' : xpath.findvalue('td/a[contains(@href,"destination")]',match).strip() ,
                        }
            for home_or_away in ['home','away']:
                new_game[home_or_away + '_team'] = get_team(match, home_or_away)
            games.append(new_game)
    return games
Example #4
0
    def test_render(self):
        class F(Form):
            fields = [
                Field('name',
                      conv=convs.Char(),
                      widget=self.widget(classname="cls"))
            ]

        form = F(self.env)

        render = form.get_field('name').widget.render('<p>Paragraph</p>')
        html = self.parse(render)
        value = self.get_value(html)
        self.assertEqual(value, '<p>Paragraph</p>')
        self.assertEqual(xpath.findvalue('.//*:%s/@readonly'%self.tag, html), None)
        self.assertEqual(xpath.findvalue('.//*:%s/@class'%self.tag, html), 'cls')
 def parse_response(self, resp):
     resp_dict = {"imported": 0, "ignored": 0, "updated": 0}
     try:
         doc = parseString(resp)
         status = xpath.findvalue('//status', doc)
         imported = xpath.findvalue('//dataValueCount[1]/@imported', doc)
         ignored = xpath.findvalue('//dataValueCount[1]/@ignored', doc)
         updated = xpath.findvalue('//dataValueCount[1]/@updated', doc)
         #conflicts = xpath.find('//conflict', doc)
         resp_dict = {
             "status": status,
             "imported": imported,
             "ignored": ignored,
             "updated": updated
         }
     except Exception, e:
         return False, "%s" % str(e)
Example #6
0
 def getValue(self, location, context=None):
     """Return a single value from the document (as string)"""
     with self:
         if context is None:
             context = self._doc.documentElement
         return xpath.findvalue(location,
                                context,
                                originalContext=[context])
Example #7
0
File: fifa.py Project: xczimi/xpool
def get_team(match, home_or_away):
    new_team = {}
    td_xpath = 'td[contains(@class,"'+home_or_away+'Team")]'
    team_href = xpath.findvalue(td_xpath+'/a/@href',match)
    td_value = xpath.findvalue(td_xpath, match)
    if not team_href is None:
        new_team['name'] = td_value
        new_team['flag'] = xpath.findvalue('td/a[@href="'+team_href+'"]/img/@src',match)
        new_team['href'] = team_href
    elif re.match(r'^[12][A-H]$', td_value):
        new_team['reference'] = {'rank': int(td_value[0]) , 'game_ref' : "Group "+td_value[1]}
    elif re.match(r'W([0-9]+)$', td_value):
        new_team['reference'] = {'rank': 1 , 'game_ref' : "KO "+td_value[1:]}
    elif re.match(r'L([0-9]+)$', td_value):
        new_team['reference'] = {'rank': 2 , 'game_ref' : "KO "+td_value[1:]}
    else:
        new_team['reference'] = td_value
    return new_team
Example #8
0
File: uefa.py Project: xczimi/xpool
def get_games(stage):
    dom = get_dom(stage)
    games = []
    #print debug
    for matchdom in dom:
        group_name = xpath.findvalue('tr/td/div/span[@class="gname"]//a',
                                     matchdom)
        if None == group_name:
            group_name = xpath.findvalue('tr/td/div/span[@class="rname"]//a',
                                         matchdom)
            if None == group_name:
                group_name = ""
            else:
                group_name = group_name.strip(' \t\n\r')
        else:
            group_name = group_name.strip(' \t\n\r')
        dayvalue = xpath.findvalue(
            'tr/td/div/span[@class="b dateT"]',
            matchdom).strip().split(' ')[0].encode('utf-8')
        hourstr = xpath.findvalue('tr/td[@class="c b score nob"]//a', matchdom)
        if None == hourstr:
            hourstr = xpath.findvalue('tr/td[@class="c b score nob"]',
                                      matchdom)
            if None == hourstr:
                print matchdom.toprettyxml(encoding='utf-8')
                return []
        hourvalue = hourstr.strip().encode('utf-8')
        matchtime = datetime(2012, 6, int(dayvalue),
                             int(hourvalue.split('.')[0]) - 2,
                             int(hourvalue.split('.')[1]), 0, 0)
        stadium = re.match(r".*Stadium:.*,(.*)",
                           xpath.findvalue('tr[@class="referee_stadium"]/td',
                                           matchdom).encode('utf-8').strip(),
                           flags=re.DOTALL).group(1).strip()
        hournode = xpath.findvalue('tr/td[@class="c b score nob"]//a/@href',
                                   matchdom)
        if None == hournode:
            print matchdom.toprettyxml(encoding='utf-8')
            return []
        matchid = int(
            re.match(
                r"/uefaeuro/season=2012/matches/round=[0-9]+/match=([0-9]+)/index.html",
                hournode).group(1)) - 2003318
        #print [ group_name , dayvalue , hourvalue , matchtime, stadium , matchid, hournode ]
        new_game = {
            'id': matchid,
            'group': group_name,
            'time': matchtime,
            'location': stadium,
        }
        for home_or_away in ['home', 'away']:
            new_game[home_or_away + '_team'] = get_team(matchdom, home_or_away)
        games.append(new_game)
    return games
Example #9
0
File: uefa.py Project: xczimi/xpool
def get_team(match, home_or_away):
    new_team = {}
    td_xpath = 'tr/td[contains(@class,"' + home_or_away + '")]'
    team_href = xpath.findvalue(td_xpath + '/a/@href', match)
    td_value = xpath.findvalue(td_xpath, match).strip()
    if not team_href is None:
        new_team['name'] = td_value
        new_team['flag'] = xpath.findvalue(
            '//tr/td/a[@href="' + team_href + '"]/img/@src', match)
        new_team['href'] = team_href
    elif re.match(r'^[12][A-H]$', td_value):
        new_team['reference'] = {
            'rank': int(td_value[0]),
            'game_ref': "Group " + td_value[1]
        }
    elif re.match(r'W([0-9]+)$', td_value):
        new_team['reference'] = {'rank': 1, 'game_ref': "KO " + td_value[1:]}
    elif re.match(r'L([0-9]+)$', td_value):
        new_team['reference'] = {'rank': 2, 'game_ref': "KO " + td_value[1:]}
    else:
        new_team['reference'] = td_value
    return new_team
Example #10
0
def get_games(stage):
    dom = get_dom(stage)
    games = []
    for groupdom in dom:
        group_name = xpath.findvalue('caption', groupdom)
        for match in xpath.find('tbody/tr', groupdom):
            new_game = {
                'id':
                xpath.findvalue('td[contains(@class,"mNum")]', match),
                'group':
                group_name,
                'time':
                timestring_to_datetime(
                    xpath.findvalue('td[contains(@class,"dt")]/span/@title',
                                    match).split(",")[1].encode('utf-8')),
                'location':
                xpath.findvalue('td/a[contains(@href,"destination")]',
                                match).strip(),
            }
            for home_or_away in ['home', 'away']:
                new_game[home_or_away + '_team'] = get_team(
                    match, home_or_away)
            games.append(new_game)
    return games
Example #11
0
    def test_render_readonly(self):
        class F(Form):
            fields = [
                Field('name',
                      conv=convs.Char(),
                      widget=self.widget(),
                      permissions="r",
                      )
            ]

        form = F(self.env)

        form.raw_data = MultiDict({'name': '<p>Paragraph</p>'})
        render = form.get_field('name').widget.render()
        html = self.parse(render)
        value = self.get_value(html)
        self.assertEqual(value, '<p>Paragraph</p>')
        self.assertEqual(xpath.findvalue('.//*:%s/@readonly'% self.tag, html), 'readonly')
Example #12
0
def extractMenu(doc):
    weekdays = ["Måndag", "Tisdag", "Onsdag", "Torsdag", "Fredag", "Lördag", "Söndag"]
    menu = []
    c = 2
    i = 0
    menudoc = xpath.find("//div[@class='menyn']", doc)
    while i < 5 and c < 20:
        #weekmenu = xpath.findvalue("//p[@align='center']/font[$day]", menudoc[0], day=c)
        weekmenu = xpath.findvalue("//font[$day]", menudoc[0], day=c)
        c = c + 1
        if weekmenu and len(weekmenu) > 7:
            i = i + 1
            regexp = r'(' + '|'.join(weekdays) + ')$' 
            weekmenu = re.sub(regexp, '', weekmenu)
            menu.append(weekmenu)
            #print "Day " + str(i) + "\n " + weekmenu
        #else:
            #print "Found no weekmenu"

    if c == 20:
        raise LunchrParseException("Couldn't find any lunch menu.")

    return menu
Example #13
0
File: uefa.py Project: xczimi/xpool
def get_games(stage):
    dom = get_dom(stage)
    games = []
    #print debug
    for matchdom in dom:
        group_name = xpath.findvalue('tr/td/div/span[@class="gname"]//a',matchdom)
        if None == group_name:
            group_name = xpath.findvalue('tr/td/div/span[@class="rname"]//a',matchdom)
            if None == group_name:
                group_name = ""
            else:
                group_name = group_name.strip(' \t\n\r')
        else:
            group_name = group_name.strip(' \t\n\r')
        dayvalue = xpath.findvalue('tr/td/div/span[@class="b dateT"]',matchdom).strip().split(' ')[0].encode('utf-8')
        hourstr = xpath.findvalue('tr/td[@class="c b score nob"]//a',matchdom)
        if None == hourstr:
            hourstr = xpath.findvalue('tr/td[@class="c b score nob"]',matchdom)
            if None == hourstr:
                print matchdom.toprettyxml(encoding='utf-8')
                return []
        hourvalue = hourstr.strip().encode('utf-8')
        matchtime = datetime(2012,6,int(dayvalue),int(hourvalue.split('.')[0])-2,int(hourvalue.split('.')[1]),0,0)
        stadium = re.match(r".*Stadium:.*,(.*)",xpath.findvalue('tr[@class="referee_stadium"]/td',matchdom).encode('utf-8').strip(), flags = re.DOTALL).group(1).strip()
        hournode = xpath.findvalue('tr/td[@class="c b score nob"]//a/@href',matchdom)
        if None == hournode:
            print matchdom.toprettyxml(encoding='utf-8')
            return []
        matchid = int(re.match(r"/uefaeuro/season=2012/matches/round=[0-9]+/match=([0-9]+)/index.html",hournode).group(1)) - 2003318
        #print [ group_name , dayvalue , hourvalue , matchtime, stadium , matchid, hournode ]
        new_game = {'id' : matchid ,
                    'group' : group_name ,
                    'time' : matchtime ,
                    'location' : stadium ,
                    }
        for home_or_away in ['home','away']:
            new_game[home_or_away + '_team'] = get_team(matchdom, home_or_away)
        games.append(new_game)
    return games
Example #14
0
 def parse(self, node):
     selection = xpath.findvalue(self.selector, node)
     return selection.lower() in ["true", "1", 'yes', 'y']
Example #15
0
 def get_options(self, html):
     return [(x.getAttribute('value'),
              xpath.findvalue('./*:label/text()', x.parentNode),
              x.hasAttribute('checked'))
             for x in xpath.find('.//*:input', html)]
Example #16
0
 def get_value(self, html):
     return xpath.findvalue('.//*:%s/@value'%self.tag, html)
Example #17
0
 def check_multiple(self, html):
     self.assertEqual(xpath.findvalue('.//*:input/@type', html),
                      'checkbox')
events = pulldom.parse(NLWIKI_FILE)

nltk_stopwords = nltk.corpus.stopwords.words('dutch')

with open("data/stopwords.txt") as f:
    stopwords = f.readlines()
    stopwords = set([x.strip() for x in stopwords])

stopwords.update(nltk_stopwords)

for event, node in events:
    if event == 'START_ELEMENT' and node.tagName == 'page':
        x += 1

        events.expandNode(node)  # node now contains a dom fragment
        title = xpath.findvalue('title', node)
        title = re.sub("[\(|].*?[\)]", "", title).strip().lower()
        if len(title.split()) > 1 or any(bad in title for bad in bad_list):
            continue
        title = re.sub(r'[\W]+', "", title)

        revision = xpath.findvalue('revision', node)
        text = xpath.findvalues('revision/text', node)
        wiki_parsed = wtp.parse(text[0]).sections[0]
        wiki_parsed_str = str(wiki_parsed)
        for table in wiki_parsed.tables:
            wiki_parsed_str = wiki_parsed_str.replace(str(table), "")
        for tmpl in wiki_parsed.templates:
            wiki_parsed_str = wiki_parsed_str.replace(str(tmpl), "")
        for ref in wiki_parsed.get_tags():
            wiki_parsed_str = wiki_parsed_str.replace(str(ref), '')
Example #19
0
 def test_compiled_expr_argument(self):
     expr = xpath.XPath('//item[3]')
     result = xpath.findvalue(expr, self.doc)
     self.failUnlessEqual(result, 'parrot')
Example #20
0
 def parse(self, node):
     return xpath.findvalue(self.selector, node)
Example #21
0
 def check_not_multiple(self, html):
     self.assertEqual(xpath.findvalue('.//*:select/@multiple', html),
                      None)
Example #22
0
def _findvalue(root, xpath_expr):
    return xpath.findvalue(xpath_expr, root)
Example #23
0
 def test_compiled_expr_argument(self):
     expr = xpath.XPath('//item[3]')
     result = xpath.findvalue(expr, self.doc)
     self.assertEqual(result, 'parrot')
Example #24
0
 def check_not_multiple(self, html):
     self.assertEqual(xpath.findvalue('.//*:input/@type', html),
                      'radio')