Exemple #1
0
def wik(phenny, input):
    """.wik <term> - Look up something on Wikipedia."""

    origterm = input.groups()[1]
    if not origterm:
        return phenny.say('Perhaps you meant ".wik Zen"?')

    term = web.unquote(origterm)
    term = term[0].upper() + term[1:]
    term = term.replace(' ', '_')

    w = wiki.Wiki(wikiapi, wikiuri, wikisearch)

    try:
        result = w.search(term)
    except web.ConnectionError:
        error = "Can't connect to en.wikipedia.org ({0})".format(
            wikiuri.format(term))
        return phenny.say(error)

    if result is not None:
        phenny.say(result)
    else:
        phenny.say(
            'Can\'t find anything in Wikipedia for "{0}".'.format(origterm))
def wikipedia(phenny, input, origterm, lang, to_user=None):
    origterm = origterm.strip()
    lang = lang.strip()

    if not origterm:
        return phenny.say('Perhaps you meant ".wik Zen"?')

    section = None

    if "#" in origterm:
        origterm, section = origterm.split("#")[:2]
        section = format_subsection(section)
    term = format_term(origterm)

    w = wiki.Wiki(wikiapi % lang, wikiuri % lang, wikisearch % lang)

    try:
        result = w.search(term)
    except web.ConnectionError:
        error = "Can't connect to en.wikipedia.org ({0})".format(
            wikiuri.format(term))
        return phenny.say(error)

    if result is not None:
        #Disregarding [0], the snippet
        url = result.split("|")[-1]
        check_posted(phenny, input, url)
        if to_user:
            phenny.say(to_user + ', ' + parse_wiki_page(url, term, section))
        else:
            phenny.say(parse_wiki_page(url, term, section))
    else:
        phenny.say(
            'Can\'t find anything in Wikipedia for "{0}".'.format(origterm))
def get_candidates(qatp):

    print 'loading data...'
    idf = pkl.load(open(prm.idf_path, "rb"))
    wk = wiki.Wiki(prm.pages_path)

    print 'creating vocabulary...'
    vocab = {}
    for q, _, _, _ in qatp:
        words = wordpunct_tokenize(q.lower())
        for word in words:
            if word in idf:
                vocab[word] = {}

    print 'creating inverted index...'
    i = 0
    for text in wk.get_text_iter():
        if i % 10000 == 0:
            print 'article', i
        words = wordpunct_tokenize(text.lower())
        for word in words:
            if word in vocab:
                vocab[word][i] = 0

        #if i > 500000:
        #    break
        i += 1

    print 'selecting pages...'
    candidates = []
    for i, [q, _, _, _] in enumerate(qatp):
        st = time.time()
        words = wordpunct_tokenize(q.lower())
        scores = {}

        for word in words:
            if word in vocab:
                if len(vocab[word]) < 100000:
                    for pageid in vocab[word].keys():
                        if pageid not in scores:
                            scores[pageid] = 0.
                        scores[pageid] += idf[word]
        idxs = np.argsort(np.asarray(scores.values()))[::-1]

        pages = scores.keys()

        if len(idxs) == 0:
            print 'error question:', q

        c = OrderedDict()
        for idx in idxs[:prm.max_candidates]:
            c[pages[idx]] = 0

        candidates.append(c)
        print 'sample ' + str(i) + ' time ' + str(time.time() - st)

        #if i > 10000:
        #    break

    return candidates
Exemple #4
0
def create_index():

    lucene.initVM()
    if os.path.exists(prm.index_folder):
        shutil.rmtree(prm.index_folder)

    indexDir = SimpleFSDirectory(File(prm.index_folder))
    writerConfig = IndexWriterConfig(Version.LUCENE_4_10_1, StandardAnalyzer())
    writer = IndexWriter(indexDir, writerConfig)
    wk = wiki.Wiki(prm.pages_path)

    print "%d docs in index" % writer.numDocs()
    print "Reading files from wikipedia..."
    n = 0
    for l in wk.get_text_iter():
        doc = Document()
        doc.add(Field("text", l, Field.Store.YES, Field.Index.ANALYZED))
        doc.add(Field("id", str(n), Field.Store.YES, Field.Index.ANALYZED))
        writer.addDocument(doc)
        n += 1
        if n % 100000 == 0:
            print 'indexing article', n
    print "Indexed %d docs from wikipedia (%d docs in index)" % (
        n, writer.numDocs())
    print "Closing index of %d docs..." % writer.numDocs()
    writer.close()
Exemple #5
0
def wiki_():
    try:
        wiki_ = wiki.Wiki()
        wiki_results = wiki_.get_wiki(dname)
        p = wiki_results["query"]["pages"]
        q = list(p.keys())
        print(q)
        return json.dumps({"desc": p[q[0]]["extract"]})
    except:
        return json.dumps({"desc": "Wiki not found about" + dname})
Exemple #6
0
async def on_ready():
    # Marks bot as running
    await bot.change_presence(
        activity=discord.Game('Reading your timing reports'))
    logging.info('Connected to bot: {}'.format(bot.user.name))
    logging.info('Bot ID: {}'.format(bot.user.id))
    logging.info('Bot fully loaded')
    logging.info('Original creators: https://github.com/Pemigrade/botflop')
    global Wiki
    Wiki = wikilib.Wiki(0)
Exemple #7
0
def apertium_wiki(phenny, origterm, to_nick=None):
    term, section = wiki.parse_term(origterm)

    w = wiki.Wiki(endpoints, None)
    match = w.search(term)

    if not match:
        phenny.say('Can\'t find anything in the Apertium Wiki for "{0}".'.format(term))
        return

    snippet, url = wiki.extract_snippet(match, section)

    if to_nick:
        phenny.say(truncate(snippet, to_nick + ', "{}" - ' + url))
    else:
        phenny.say(truncate(snippet, '"{}" - ' + url))
Exemple #8
0
    def __init__(self):
        practice_file1.SpeechBot.__init__(self, "dummy text")

        self.w = wiki.Wiki()
        self.nouns = self.get_nouns()
        self.adjectives = self.get_adjectives()
        self.verbs = self.get_verbs()
        self.adverbs = self.get_adverbs()
        self.pronouns = self.get_pronouns()
        self.personal_pronouns = self.get_personal_pronouns()
        self.converter = {
            "nouns": self.nouns,
            "adjectives": self.adjectives,
            "verbs": self.verbs,
            "pronouns": self.pronouns,
            "adverbs": self.adverbs,
            "personal_pronouns": self.personal_pronouns
        }
        self.parse_sentence()
    def __init__(self, user_input):
        self.last_response = user_input
        self.stop_words = ["a", "the", "am", "an", "of", "as", "be"]
        self.total_stop_words = list(
            itertools.chain.from_iterable(
                list(csv.reader(open("stop-word-list.csv")))))
        self.nouns = self.get_nouns()
        self.adjectives = self.get_adjectives()
        self.verbs = self.get_verbs()
        self.pronouns = self.get_pronouns()

        self.adverbs = self.get_adverbs()
        self.personal_pronouns = self.get_personal_pronouns()
        #print "personal_pronouns", self.personal_pronouns
        self.query_words = [
            "what", "when", "where", "which", "who", "whom", "whose", "why",
            "did"
        ]
        self.w = wiki.Wiki()
Exemple #10
0
def vtluug(phenny, input): 
    """.vtluug <term> - Look up something on the VTLUUG wiki."""

    origterm = input.groups()[1]
    if not origterm: 
        return phenny.say('Perhaps you meant ".vtluug VT-Wireless"?')

    term, section = wiki.parse_term(origterm)

    w = wiki.Wiki(endpoints)
    match = w.search(term)

    if not match:
        phenny.say('Can\'t find anything in the VTLUUG Wiki for "{0}".'.format(term))
        return

    snippet, url = wiki.extract_snippet(match, section)

    phenny.say('"{0}" - {1}'.format(snippet, url))
Exemple #11
0
def awik(phenny, input):
    """.awik <term> - Look up something on the ArchWiki."""

    origterm = input.group(1)
    if not origterm:
        return phenny.say('Perhaps you meant ".awik dwm"?')

    term, section = wiki.parse_term(origterm)

    w = wiki.Wiki(endpoints)
    match = w.search(term)

    if not match:
        phenny.say(
            'Can\'t find anything in the ArchWiki for "{0}".'.format(term))
        return

    snippet, url = wiki.extract_snippet(match, section)

    phenny.say('"{0}" - {1}'.format(snippet, url))
Exemple #12
0
def push():
    print 'Pushing as %s' % conf['username']
    w = wiki.Wiki(conf['url'], conf['username'], conf['password'])

    for (new_file, d) in diff.diff():
        m = DIFF_FILE_RE.match(d[0])
        if not m:
            print 'Failed to parse diff for %s' % d[0]
            continue

        tempdir = tempfile.mkdtemp()
        title = m.group(1)

        new_file_msg = ''
        if new_file:
            new_file_msg = ', page is new'

        print
        print ('Editted file: %s (resolving in %s%s)'
               %(title, tempdir, new_file_msg))

        try:
            with open(os.path.join(tempdir, title), 'w') as f:
                if not new_file:
                    f.write(w.get_page(title).encode('ascii', 'replace'))
            with open(os.path.join(tempdir, '.patch'), 'w') as f:
                f.write(''.join(d))
            cmd = 'cd %s; patch < .patch' % tempdir
            (out, exit) = utils.execute(cmd)
            if exit != 0:
                print '    %s' % '    '.join(out)
                continue

            with open(os.path.join(tempdir, title), 'r') as f:
                data = f.read()
            w.post_page(title.replace('!slash!', '/'), data, minor=False,
                        bot=False)

        finally:
            pass
        shutil.rmtree(tempdir)
Exemple #13
0
def wik(phenny, input):
    """.wik <term> - Look up something on Wikipedia."""

    origterm = input.groups()[1]
    if not origterm:
        return phenny.say('Perhaps you meant ".wik Zen"?')

    origterm = origterm.strip()
    term, section = wiki.parse_term(origterm)

    w = wiki.Wiki(endpoints)
    match = w.search(term)

    if not match:
        phenny.say(
            'Can\'t find anything in Wikipedia for "{0}".'.format(origterm))
        return

    snippet, url = wiki.extract_snippet(match, section)

    phenny.say('"{0}" - {1}'.format(snippet, url))
def get_candidates(qatp):

    wk = wiki.Wiki(prm.pages_path)
    titles_pos = wk.get_titles_pos()

    candidates = []
    n = 0
    for q,a,t,p in qatp:
        if n % 100 == 0:
            print 'finding candidates sample', n
        n+=1

        c = []

        for page in google.search(q.lower() + ' site:wikipedia.org', num=prm.max_candidates,stop=prm.max_candidates, pause=45):
            title = page.replace('https://en.wikipedia.org/wiki/','').replace('_',' ').lower()
            if title in titles_pos:
                c.append(titles_pos(title))

        candidates.append(c)
        
    return candidates
Exemple #15
0
def wikipedia(phenny, origterm, lang, to_user=None):
    if not origterm:
        return phenny.say('Perhaps you meant ".wik Zen"?')

    origterm = origterm.strip()
    lang = lang.strip()

    term, section = wiki.parse_term(origterm)

    w = wiki.Wiki(endpoints, lang)
    match = w.search(term)

    if not match:
        phenny.say(
            'Can\'t find anything in Wikipedia for "{0}".'.format(origterm))
        return

    snippet, url = wiki.extract_snippet(match, section)

    if to_user:
        phenny.say(truncate(snippet, to_user + ', "{}" - ' + url))
    else:
        phenny.say(truncate(snippet, '"{}" - ' + url))
Exemple #16
0
def awik(phenny, input):
    origterm = input.groups()[1]
    if not origterm:
        return phenny.say('Perhaps you meant ".awik dwm"?')

    term = web.unquote(origterm)
    term = term[0].upper() + term[1:]
    term = term.replace(' ', '_')

    w = wiki.Wiki(wikiapi, wikiuri, wikisearch)

    try:
        result = w.search(term)
    except web.ConnectionError:
        error = "Can't connect to wiki.archlinux.org ({0})".format(
            wikiuri.format(term))
        return phenny.say(error)

    if result is not None:
        phenny.say(result)
    else:
        phenny.say(
            'Can\'t find anything in the ArchWiki for "{0}".'.format(origterm))
Exemple #17
0
async def reloadw(ctx):
    await reload_modules(ctx, "wiki.py")
    global Wiki
    Wiki = wikilib.Wiki(0)
Exemple #18
0
#!/usr/bin/python

# Download a local copy of the wiki

import json
import os
import wiki


with open(os.path.expanduser('~/.mediawiki'), 'r') as f:
    conf = json.loads(f.read())[os.environ['USER']]


if __name__ == '__main__':
    w = wiki.Wiki(conf['url'], conf['username'], conf['password'])

    if not os.path.exists('.mediawiki'):
        os.makedirs('.mediawiki')

    for title in w.all_pages():
        print title
        data = w.get_page(title).encode('ascii', 'replace')
        title = title.replace('/', '!slash!')
        with open(title, 'w') as f:
            f.write(data)
        with open(os.path.join('.mediawiki', title), 'w') as f:
            f.write(data)
def find_info(site):
    exclude = [] # pages to exclude
    # create a Wiki object
    site = wiki.Wiki(site) 
    #params = {'action':'query', 'title':'Main Page',"prop":"revisions","format":"xml","rvprop":"content"}
    params = {'action':'query','list':'allpages','apprefix':'Pledges'}
    request = api.APIRequest(site, params)
    result = request.query()
    pledges = []
    for i in result["query"]["allpages"]:
        print(i["title"])
        pledges.append(i["title"])

    parsed_data = []
   
    for pledgename in pledges:
        params = {'action':'query','rvprop':'content','prop':'revisions|categories',"format":"xml","titles":"%s" %\
        pledgename}
        request = api.APIRequest(site,params)
        result = request.query()
        result = result["query"]["pages"].values()
        print(pledgename)
        try:
            if result[0]["categories"][0]["title"].lower() == "category:completedpledge":
                print("Pledge Completed")
            elif result[0]["categories"][0]["title"].lower() == "category:pledge":
                print("Pledge not completed.")
                result = result[0]["revisions"][0]["*"]
                result = result.encode("utf-8")
                lines = result.split("\n")
                
                parsed = parse_pledges(lines)
                parsed["title"] = pledgename
                parsed_data.append(parsed)
                
        except KeyError:
            print("Not a pledge")

    
    report = open("report.txt", "w")
    for i in parsed_data:
        print(i["title"],i["total"],i["target"],i["paid"])
        i["pledgers"] = map(pledge, i["pledgers"])
        report.write("\n"+i["title"]+"\n")
        if type(i["target"]) == str:
            pass
        else:
            if i["total"] >= i["target"]:
                report.write("Pledge met!\n")
                report.write("Current Non-Payers:\n")
                for pledger in i["pledgers"]:
                    if pledger.paid == False:
                        print(str(pledger.amount))
                        print(pledger.name)
                        out_string = pledger.name + u" £" + str(pledger.amount) + "\n"
                        report.write(out_string.encode("utf-8"))
            else:    
                report.write("Pledge not met. \nAmount left until total: " + str(i["target"]-i["total"]) +"\n")
                report.write("Current Pledgers:\n")
                for pledger in i["pledgers"]:
                    print(str(pledger.amount))
                    print(pledger.name)
                    out_string = pledger.name + u" £" + str(pledger.amount) + "\n"
                    report.write(out_string.encode("utf-8"))

    report.close()
Exemple #20
0
            if r in titles_pos:
                return r
            else:
                r = r.replace('(', '').replace(')', '')
                if r in titles_pos:
                    return r
                else:
                    r = re.sub('\(.*\)', '', a.lower()).strip()
                    if r in titles_pos:
                        return r
    return ''


print 'Loading data...'

wk = wiki.Wiki(prm.pages_path)
titles_pos = wk.get_titles_pos()

print 'Creating child-parent dictionary...'
G = nx.DiGraph()

for i, (title, idd) in enumerate(titles_pos.items()):

    links = wk.get_article_links(idd)
    for link in links:
        G.add_edge(idd, link, weight=1.)

    if i % prm.dispFreq == 0:
        print 'page', i

print 'Finding paths to answers...'
Exemple #21
0
from email.mime.text import MIMEText

import wiki

with open(os.path.expanduser('~/.eventbrite2mediawiki_%s' % sys.argv[1]),
          'r') as f:
    conf = json.loads(f.read())

base_url = ('https://www.eventbriteapi.com/v3/events/%s' %
            conf['eventbrite']['event_id'])
password_chars = string.ascii_letters + string.digits + '!@#$%^&*()'
username_chars = string.ascii_letters + string.digits + '-'

random.seed(os.urandom(1024))

w = wiki.Wiki(conf['mediawiki']['url'], conf['mediawiki']['username'],
              conf['mediawiki']['password'])


def send_email(email, username, password):
    body = conf['email']['body'] % {'username': username, 'password': password}

    print '-' * 40
    print body
    print '-' * 40

    msg = MIMEMultipart()
    msg['Subject'] = conf['email']['subject']
    msg['From'] = conf['email']['from']
    msg['To'] = email
    msg.preamble = body
Exemple #22
0
    def Command(self, user, channel, verb, line):
        """Execute a given verb with these arguments

        Takes the verb which the user entered, and the remainder of the line.
        Returns a string which is sent to the user.
        """
        if verb == 'ppp':
            with open(os.path.expanduser('~/.mediawiki'), 'r') as f:
                wikiconf = json.loads(f.read())['ircbot']
            w = wiki.Wiki(wikiconf['url'], wikiconf['username'],
                          wikiconf['password'])

            tuesday = datetime.datetime.now()
            while tuesday.weekday() != 1:
                tuesday += datetime.timedelta(days=1)

            # Make sure we have an index entry
            day_name = tuesday.strftime('%d %B %Y')
            found = False
            text = w.get_page('PPP reports index').split('\n')
            for index_line in text:
                if index_line.startswith('* %s' % day_name):
                    found = True
                    break

            if not found:
                entry = '* %s: ' % day_name
                for team_user in self.conf['ppp']['users']:
                    entry += ('[[%s PPP report %04d%02d%02d|%s]] ' %
                              (team_user, tuesday.year, tuesday.month,
                               tuesday.day, team_user))
                entry += (
                    '\'\'\'[[Final Combined PPP report %04d%02d%02d|Combined]]\'\'\''
                    % (tuesday.year, tuesday.month, tuesday.day))
                text.insert(0, entry)
                w.post_page('PPP reports index', '\n'.join(text))

            # Now the entry
            elems = line.split(' ')
            section = elems[0]
            if elems[-1].startswith('[') and elems[-1].endswith(']'):
                user = elems[-1][1:-1]
                line = ' '.join(elems[1:-1])
            else:
                line = ' '.join(elems[1:])

            user = user.rstrip('_')
            user = self.conf['ppp']['usermap'].get(user, user)
            title = '%s PPP report %04d%02d%02d' % (user, tuesday.year,
                                                    tuesday.month, tuesday.day)
            self.log('Adding PPP entry for %s' % title)

            ppp_line = '* %s' % line
            self.log('... section %s' % section)
            self.log('    entry "%s"' % line)

            text = w.get_page(title).split('\n')

            # This is a bit horrible. There is no support in the mediawiki api for
            # grabbing just one section, so we have to grab the entire page and then
            # parse it into its headings. However, we can assume that there are only
            # three headings on a PPP page.
            parsed = {
                'Progress': [],
                'Plans': [],
                'Problems': [],
                'Unknown': []
            }
            page_section = 'Unknown'
            for page_line in text:
                m = WIKI_SECTION_RE.match(page_line)
                if m:
                    page_section = m.group(1)
                elif page_line:
                    parsed[page_section].append(page_line)

            text = []
            for report_section in ['Progress', 'Plans', 'Problems']:
                text.append('== %s ==' % report_section)
                for page_line in parsed[report_section]:
                    text.append(page_line)

                self.log('    %s vs %s' %
                         (report_section.lower(), section.lower()))
                if report_section.lower() == section.lower():
                    if not ppp_line in text:
                        self.log('    adding the new ppp line')
                        text.append(ppp_line)
                text.append('')

            for page_line in text:
                self.log('*** %s' % page_line)

            w.post_page(title, '\n'.join(text))
            yield (channel, 'msg', 'PPP entry added to %s' % title)

        yield