from mwclient import Site language = 'is' SITE = Site(f'{language}.wikipedia.org') ALL_PAGES = SITE.allpages() for page in ALL_PAGES: print(page.name)
import re from simple_salesforce import Salesforce api = ConfluenceAPI('admin', '123@qwe', 'http://127.0.0.1:8090') FileOut = open('PagesBUSList.txt', 'w') UserAgent = 'Wiki_parser/0.1 run by DremSama' site = Site(('http', 'wiki.support.veeam.local'), path='/', clients_useragent=UserAgent) sf = Salesforce(username='******', password='******', security_token='dNr44yHsFXaSuRmKXunWPlzS') PagesList = [] PagesBUGSList = [] for page in site.allpages(): PagesList.append(page) if page.name.startswith('Bug') or page.name.startswith('bug'): print('----------------------------------') BugID = re.match(r'[A-z,a-z]ug\b.(\d*)[-|\s]*(.*)', page.page_title) if BugID: BugID_NUM = BugID.group(1) BugID_SUBJECT = BugID.group(2) textALL = page.text(0) if not textALL: print('ERROR: Page "' + page.name + '" has no text') elif textALL.startswith('#REDIRECT'): print('Page "' + page.name + '" is only a redirect page, skipping') else: print(page.name) BugCaseID = re.findall(r"'''Case ID: '''(\d*)", textALL)
def Hikes(): site = Site(('http', 'www.oregonhikers.org/')) counter = 0 hikes = [] for item in site.allpages(): page_text = item.text() if '[[Category:Hikes]]' in page_text: if '[[Category:Portland Area]]' in page_text: counter += 1 if (counter == 30): break m = re.search('(?<=latitude=).{6}', page_text) n = re.search('(?<=longitude=).{7}', page_text) p = re.search('(?<=Distance\|)(.*)(?=miles}})', page_text) q = re.search('(?<=Difficulty\|)(.*)(?=}})', page_text) lat = 0.0 long = 0.0 distance = 0.0 difficulty = "None given" if (m): try: lat = float(m.group()) except: print("no lat given") if (n): try: long = float(n.group()) except: print("no long given") if (p): try: distance = float(p.group()) except: print("invalid distance") if (q): difficulty = str(q.group()) hike = Hike(lat, long) l = re.search('Description ===(?s)(.*?)===\s', page_text, re.MULTILINE) entry = "" if l: page_text = l.group() by_line = [] by_line = page_text.splitlines() for line in by_line: try: line = str(line) line = re.sub('[\[]', '', line) line = re.sub('[\]]', '', line) line = re.sub('[\}]', '', line) line = re.sub('[\{]', '', line) except: print("page not cast") #print(line) if line: if not line.startswith( ('Description', '===', 'TripReports', 'RelatedDiscussions', '*', 'Source', '(', '=', '<')): entry = entry + "\n" + line hike.desc = entry hike.name = item.page_title hike.distance = distance hike.difficulty = difficulty if entry: hikes.append(hike) print("added: " + hike.name) print("lat: " + str(hike.lat)) print("long: " + str(hike.long)) return hikes