def wik(phenny, input): """.wik <term> - Look up something on Wikipedia.""" origterm = input.groups()[1] if not origterm: return phenny.say('Perhaps you meant ".wik Zen"?') term = web.unquote(origterm) term = term[0].upper() + term[1:] term = term.replace(' ', '_') w = wiki.Wiki(wikiapi, wikiuri, wikisearch) try: result = w.search(term) except web.ConnectionError: error = "Can't connect to en.wikipedia.org ({0})".format( wikiuri.format(term)) return phenny.say(error) if result is not None: phenny.say(result) else: phenny.say( 'Can\'t find anything in Wikipedia for "{0}".'.format(origterm))
def wikipedia(phenny, input, origterm, lang, to_user=None): origterm = origterm.strip() lang = lang.strip() if not origterm: return phenny.say('Perhaps you meant ".wik Zen"?') section = None if "#" in origterm: origterm, section = origterm.split("#")[:2] section = format_subsection(section) term = format_term(origterm) w = wiki.Wiki(wikiapi % lang, wikiuri % lang, wikisearch % lang) try: result = w.search(term) except web.ConnectionError: error = "Can't connect to en.wikipedia.org ({0})".format( wikiuri.format(term)) return phenny.say(error) if result is not None: #Disregarding [0], the snippet url = result.split("|")[-1] check_posted(phenny, input, url) if to_user: phenny.say(to_user + ', ' + parse_wiki_page(url, term, section)) else: phenny.say(parse_wiki_page(url, term, section)) else: phenny.say( 'Can\'t find anything in Wikipedia for "{0}".'.format(origterm))
def get_candidates(qatp): print 'loading data...' idf = pkl.load(open(prm.idf_path, "rb")) wk = wiki.Wiki(prm.pages_path) print 'creating vocabulary...' vocab = {} for q, _, _, _ in qatp: words = wordpunct_tokenize(q.lower()) for word in words: if word in idf: vocab[word] = {} print 'creating inverted index...' i = 0 for text in wk.get_text_iter(): if i % 10000 == 0: print 'article', i words = wordpunct_tokenize(text.lower()) for word in words: if word in vocab: vocab[word][i] = 0 #if i > 500000: # break i += 1 print 'selecting pages...' candidates = [] for i, [q, _, _, _] in enumerate(qatp): st = time.time() words = wordpunct_tokenize(q.lower()) scores = {} for word in words: if word in vocab: if len(vocab[word]) < 100000: for pageid in vocab[word].keys(): if pageid not in scores: scores[pageid] = 0. scores[pageid] += idf[word] idxs = np.argsort(np.asarray(scores.values()))[::-1] pages = scores.keys() if len(idxs) == 0: print 'error question:', q c = OrderedDict() for idx in idxs[:prm.max_candidates]: c[pages[idx]] = 0 candidates.append(c) print 'sample ' + str(i) + ' time ' + str(time.time() - st) #if i > 10000: # break return candidates
def create_index(): lucene.initVM() if os.path.exists(prm.index_folder): shutil.rmtree(prm.index_folder) indexDir = SimpleFSDirectory(File(prm.index_folder)) writerConfig = IndexWriterConfig(Version.LUCENE_4_10_1, StandardAnalyzer()) writer = IndexWriter(indexDir, writerConfig) wk = wiki.Wiki(prm.pages_path) print "%d docs in index" % writer.numDocs() print "Reading files from wikipedia..." n = 0 for l in wk.get_text_iter(): doc = Document() doc.add(Field("text", l, Field.Store.YES, Field.Index.ANALYZED)) doc.add(Field("id", str(n), Field.Store.YES, Field.Index.ANALYZED)) writer.addDocument(doc) n += 1 if n % 100000 == 0: print 'indexing article', n print "Indexed %d docs from wikipedia (%d docs in index)" % ( n, writer.numDocs()) print "Closing index of %d docs..." % writer.numDocs() writer.close()
def wiki_(): try: wiki_ = wiki.Wiki() wiki_results = wiki_.get_wiki(dname) p = wiki_results["query"]["pages"] q = list(p.keys()) print(q) return json.dumps({"desc": p[q[0]]["extract"]}) except: return json.dumps({"desc": "Wiki not found about" + dname})
async def on_ready(): # Marks bot as running await bot.change_presence( activity=discord.Game('Reading your timing reports')) logging.info('Connected to bot: {}'.format(bot.user.name)) logging.info('Bot ID: {}'.format(bot.user.id)) logging.info('Bot fully loaded') logging.info('Original creators: https://github.com/Pemigrade/botflop') global Wiki Wiki = wikilib.Wiki(0)
def apertium_wiki(phenny, origterm, to_nick=None): term, section = wiki.parse_term(origterm) w = wiki.Wiki(endpoints, None) match = w.search(term) if not match: phenny.say('Can\'t find anything in the Apertium Wiki for "{0}".'.format(term)) return snippet, url = wiki.extract_snippet(match, section) if to_nick: phenny.say(truncate(snippet, to_nick + ', "{}" - ' + url)) else: phenny.say(truncate(snippet, '"{}" - ' + url))
def __init__(self): practice_file1.SpeechBot.__init__(self, "dummy text") self.w = wiki.Wiki() self.nouns = self.get_nouns() self.adjectives = self.get_adjectives() self.verbs = self.get_verbs() self.adverbs = self.get_adverbs() self.pronouns = self.get_pronouns() self.personal_pronouns = self.get_personal_pronouns() self.converter = { "nouns": self.nouns, "adjectives": self.adjectives, "verbs": self.verbs, "pronouns": self.pronouns, "adverbs": self.adverbs, "personal_pronouns": self.personal_pronouns } self.parse_sentence()
def __init__(self, user_input): self.last_response = user_input self.stop_words = ["a", "the", "am", "an", "of", "as", "be"] self.total_stop_words = list( itertools.chain.from_iterable( list(csv.reader(open("stop-word-list.csv"))))) self.nouns = self.get_nouns() self.adjectives = self.get_adjectives() self.verbs = self.get_verbs() self.pronouns = self.get_pronouns() self.adverbs = self.get_adverbs() self.personal_pronouns = self.get_personal_pronouns() #print "personal_pronouns", self.personal_pronouns self.query_words = [ "what", "when", "where", "which", "who", "whom", "whose", "why", "did" ] self.w = wiki.Wiki()
def vtluug(phenny, input): """.vtluug <term> - Look up something on the VTLUUG wiki.""" origterm = input.groups()[1] if not origterm: return phenny.say('Perhaps you meant ".vtluug VT-Wireless"?') term, section = wiki.parse_term(origterm) w = wiki.Wiki(endpoints) match = w.search(term) if not match: phenny.say('Can\'t find anything in the VTLUUG Wiki for "{0}".'.format(term)) return snippet, url = wiki.extract_snippet(match, section) phenny.say('"{0}" - {1}'.format(snippet, url))
def awik(phenny, input): """.awik <term> - Look up something on the ArchWiki.""" origterm = input.group(1) if not origterm: return phenny.say('Perhaps you meant ".awik dwm"?') term, section = wiki.parse_term(origterm) w = wiki.Wiki(endpoints) match = w.search(term) if not match: phenny.say( 'Can\'t find anything in the ArchWiki for "{0}".'.format(term)) return snippet, url = wiki.extract_snippet(match, section) phenny.say('"{0}" - {1}'.format(snippet, url))
def push(): print 'Pushing as %s' % conf['username'] w = wiki.Wiki(conf['url'], conf['username'], conf['password']) for (new_file, d) in diff.diff(): m = DIFF_FILE_RE.match(d[0]) if not m: print 'Failed to parse diff for %s' % d[0] continue tempdir = tempfile.mkdtemp() title = m.group(1) new_file_msg = '' if new_file: new_file_msg = ', page is new' print print ('Editted file: %s (resolving in %s%s)' %(title, tempdir, new_file_msg)) try: with open(os.path.join(tempdir, title), 'w') as f: if not new_file: f.write(w.get_page(title).encode('ascii', 'replace')) with open(os.path.join(tempdir, '.patch'), 'w') as f: f.write(''.join(d)) cmd = 'cd %s; patch < .patch' % tempdir (out, exit) = utils.execute(cmd) if exit != 0: print ' %s' % ' '.join(out) continue with open(os.path.join(tempdir, title), 'r') as f: data = f.read() w.post_page(title.replace('!slash!', '/'), data, minor=False, bot=False) finally: pass shutil.rmtree(tempdir)
def wik(phenny, input): """.wik <term> - Look up something on Wikipedia.""" origterm = input.groups()[1] if not origterm: return phenny.say('Perhaps you meant ".wik Zen"?') origterm = origterm.strip() term, section = wiki.parse_term(origterm) w = wiki.Wiki(endpoints) match = w.search(term) if not match: phenny.say( 'Can\'t find anything in Wikipedia for "{0}".'.format(origterm)) return snippet, url = wiki.extract_snippet(match, section) phenny.say('"{0}" - {1}'.format(snippet, url))
def get_candidates(qatp): wk = wiki.Wiki(prm.pages_path) titles_pos = wk.get_titles_pos() candidates = [] n = 0 for q,a,t,p in qatp: if n % 100 == 0: print 'finding candidates sample', n n+=1 c = [] for page in google.search(q.lower() + ' site:wikipedia.org', num=prm.max_candidates,stop=prm.max_candidates, pause=45): title = page.replace('https://en.wikipedia.org/wiki/','').replace('_',' ').lower() if title in titles_pos: c.append(titles_pos(title)) candidates.append(c) return candidates
def wikipedia(phenny, origterm, lang, to_user=None): if not origterm: return phenny.say('Perhaps you meant ".wik Zen"?') origterm = origterm.strip() lang = lang.strip() term, section = wiki.parse_term(origterm) w = wiki.Wiki(endpoints, lang) match = w.search(term) if not match: phenny.say( 'Can\'t find anything in Wikipedia for "{0}".'.format(origterm)) return snippet, url = wiki.extract_snippet(match, section) if to_user: phenny.say(truncate(snippet, to_user + ', "{}" - ' + url)) else: phenny.say(truncate(snippet, '"{}" - ' + url))
def awik(phenny, input): origterm = input.groups()[1] if not origterm: return phenny.say('Perhaps you meant ".awik dwm"?') term = web.unquote(origterm) term = term[0].upper() + term[1:] term = term.replace(' ', '_') w = wiki.Wiki(wikiapi, wikiuri, wikisearch) try: result = w.search(term) except web.ConnectionError: error = "Can't connect to wiki.archlinux.org ({0})".format( wikiuri.format(term)) return phenny.say(error) if result is not None: phenny.say(result) else: phenny.say( 'Can\'t find anything in the ArchWiki for "{0}".'.format(origterm))
async def reloadw(ctx): await reload_modules(ctx, "wiki.py") global Wiki Wiki = wikilib.Wiki(0)
#!/usr/bin/python # Download a local copy of the wiki import json import os import wiki with open(os.path.expanduser('~/.mediawiki'), 'r') as f: conf = json.loads(f.read())[os.environ['USER']] if __name__ == '__main__': w = wiki.Wiki(conf['url'], conf['username'], conf['password']) if not os.path.exists('.mediawiki'): os.makedirs('.mediawiki') for title in w.all_pages(): print title data = w.get_page(title).encode('ascii', 'replace') title = title.replace('/', '!slash!') with open(title, 'w') as f: f.write(data) with open(os.path.join('.mediawiki', title), 'w') as f: f.write(data)
def find_info(site): exclude = [] # pages to exclude # create a Wiki object site = wiki.Wiki(site) #params = {'action':'query', 'title':'Main Page',"prop":"revisions","format":"xml","rvprop":"content"} params = {'action':'query','list':'allpages','apprefix':'Pledges'} request = api.APIRequest(site, params) result = request.query() pledges = [] for i in result["query"]["allpages"]: print(i["title"]) pledges.append(i["title"]) parsed_data = [] for pledgename in pledges: params = {'action':'query','rvprop':'content','prop':'revisions|categories',"format":"xml","titles":"%s" %\ pledgename} request = api.APIRequest(site,params) result = request.query() result = result["query"]["pages"].values() print(pledgename) try: if result[0]["categories"][0]["title"].lower() == "category:completedpledge": print("Pledge Completed") elif result[0]["categories"][0]["title"].lower() == "category:pledge": print("Pledge not completed.") result = result[0]["revisions"][0]["*"] result = result.encode("utf-8") lines = result.split("\n") parsed = parse_pledges(lines) parsed["title"] = pledgename parsed_data.append(parsed) except KeyError: print("Not a pledge") report = open("report.txt", "w") for i in parsed_data: print(i["title"],i["total"],i["target"],i["paid"]) i["pledgers"] = map(pledge, i["pledgers"]) report.write("\n"+i["title"]+"\n") if type(i["target"]) == str: pass else: if i["total"] >= i["target"]: report.write("Pledge met!\n") report.write("Current Non-Payers:\n") for pledger in i["pledgers"]: if pledger.paid == False: print(str(pledger.amount)) print(pledger.name) out_string = pledger.name + u" £" + str(pledger.amount) + "\n" report.write(out_string.encode("utf-8")) else: report.write("Pledge not met. \nAmount left until total: " + str(i["target"]-i["total"]) +"\n") report.write("Current Pledgers:\n") for pledger in i["pledgers"]: print(str(pledger.amount)) print(pledger.name) out_string = pledger.name + u" £" + str(pledger.amount) + "\n" report.write(out_string.encode("utf-8")) report.close()
if r in titles_pos: return r else: r = r.replace('(', '').replace(')', '') if r in titles_pos: return r else: r = re.sub('\(.*\)', '', a.lower()).strip() if r in titles_pos: return r return '' print 'Loading data...' wk = wiki.Wiki(prm.pages_path) titles_pos = wk.get_titles_pos() print 'Creating child-parent dictionary...' G = nx.DiGraph() for i, (title, idd) in enumerate(titles_pos.items()): links = wk.get_article_links(idd) for link in links: G.add_edge(idd, link, weight=1.) if i % prm.dispFreq == 0: print 'page', i print 'Finding paths to answers...'
from email.mime.text import MIMEText import wiki with open(os.path.expanduser('~/.eventbrite2mediawiki_%s' % sys.argv[1]), 'r') as f: conf = json.loads(f.read()) base_url = ('https://www.eventbriteapi.com/v3/events/%s' % conf['eventbrite']['event_id']) password_chars = string.ascii_letters + string.digits + '!@#$%^&*()' username_chars = string.ascii_letters + string.digits + '-' random.seed(os.urandom(1024)) w = wiki.Wiki(conf['mediawiki']['url'], conf['mediawiki']['username'], conf['mediawiki']['password']) def send_email(email, username, password): body = conf['email']['body'] % {'username': username, 'password': password} print '-' * 40 print body print '-' * 40 msg = MIMEMultipart() msg['Subject'] = conf['email']['subject'] msg['From'] = conf['email']['from'] msg['To'] = email msg.preamble = body
def Command(self, user, channel, verb, line): """Execute a given verb with these arguments Takes the verb which the user entered, and the remainder of the line. Returns a string which is sent to the user. """ if verb == 'ppp': with open(os.path.expanduser('~/.mediawiki'), 'r') as f: wikiconf = json.loads(f.read())['ircbot'] w = wiki.Wiki(wikiconf['url'], wikiconf['username'], wikiconf['password']) tuesday = datetime.datetime.now() while tuesday.weekday() != 1: tuesday += datetime.timedelta(days=1) # Make sure we have an index entry day_name = tuesday.strftime('%d %B %Y') found = False text = w.get_page('PPP reports index').split('\n') for index_line in text: if index_line.startswith('* %s' % day_name): found = True break if not found: entry = '* %s: ' % day_name for team_user in self.conf['ppp']['users']: entry += ('[[%s PPP report %04d%02d%02d|%s]] ' % (team_user, tuesday.year, tuesday.month, tuesday.day, team_user)) entry += ( '\'\'\'[[Final Combined PPP report %04d%02d%02d|Combined]]\'\'\'' % (tuesday.year, tuesday.month, tuesday.day)) text.insert(0, entry) w.post_page('PPP reports index', '\n'.join(text)) # Now the entry elems = line.split(' ') section = elems[0] if elems[-1].startswith('[') and elems[-1].endswith(']'): user = elems[-1][1:-1] line = ' '.join(elems[1:-1]) else: line = ' '.join(elems[1:]) user = user.rstrip('_') user = self.conf['ppp']['usermap'].get(user, user) title = '%s PPP report %04d%02d%02d' % (user, tuesday.year, tuesday.month, tuesday.day) self.log('Adding PPP entry for %s' % title) ppp_line = '* %s' % line self.log('... section %s' % section) self.log(' entry "%s"' % line) text = w.get_page(title).split('\n') # This is a bit horrible. There is no support in the mediawiki api for # grabbing just one section, so we have to grab the entire page and then # parse it into its headings. However, we can assume that there are only # three headings on a PPP page. parsed = { 'Progress': [], 'Plans': [], 'Problems': [], 'Unknown': [] } page_section = 'Unknown' for page_line in text: m = WIKI_SECTION_RE.match(page_line) if m: page_section = m.group(1) elif page_line: parsed[page_section].append(page_line) text = [] for report_section in ['Progress', 'Plans', 'Problems']: text.append('== %s ==' % report_section) for page_line in parsed[report_section]: text.append(page_line) self.log(' %s vs %s' % (report_section.lower(), section.lower())) if report_section.lower() == section.lower(): if not ppp_line in text: self.log(' adding the new ppp line') text.append(ppp_line) text.append('') for page_line in text: self.log('*** %s' % page_line) w.post_page(title, '\n'.join(text)) yield (channel, 'msg', 'PPP entry added to %s' % title) yield