Esempio n. 1
0
 def edit_post(self, postid, body, reason=''):
      if not self.is_logged_in():
           raise ValueError("Failure: can't edit post before logging in")
      postid = str(postid)
      page = blogotubes('http://www.mersenneforum.org/editpost.php?do=editpost&p='+postid)
      if username not in page: # Verify cookies installed properly
           raise ValueError("Failure: tried to edit post {} but not logged in!".format(postid))
      stoken, phash, ptime = self.parse_tokens(page)
      data = self.fill_form(body, postid, stoken, phash, ptime, reason)
      page = blogotubes('http://www.mersenneforum.org/editpost.php?do=updatepost&p='+postid, data=data)
      # Ignore response until I know what to check for
      return page
Esempio n. 2
0
 def parse_text_file(reservee, url):
      global email_msg
      old = {seq.seq for seq in db.values() if seq.res == reservee}
      txt = blogotubes(url)
      current = set()
      for line in txt.splitlines():
           if re.match(r'(?<![0-9])[0-9]{5,6}(?![0-9])', line):
                seq = int(line)
                if seq in current:
                     string = "Duplicate sequence? {} {}".format(seq, url)
                     Print(string)
                     email_msg += string+'\n'
                else:
                     current.add(seq)
           elif not re.match(r'^[0-9]+$', line):
                string = "Unknown line from {}: {}".format(url, line)
                Print(string)
                email_msg += string+'\n'
      # easy peasy lemon squeezy
      done = old - current
      new = current - old
      if done or new:
           spider_msg.append('{}: Add {}, Drop {}'.format(reservee, len(new), len(done)))
           drop_db(db, reservee, done)
           add_db(db, reservee, new)
Esempio n. 3
0
def get_reservations(pid):
     # Copied from allseq.py
     page = blogotubes(reservation_page + '?p='+str(pid))
     # Isolate the [code] block with the reservations
     page = re.search(r'<pre.*?>(.*?)</pre>', page, flags=re.DOTALL).group(1)
     ind = page.find('\n')
     if ind == -1: # No newline means only "<b>Seq Who Index Size</b>", i.e. empty, so no reservations
          return ""
     else:
          return page[ind+1:] # Dump the first line == "<b>Seq Who Index Size</b>"
Esempio n. 4
0
 def login(self):
      data = {'vb_login_username': username, 'vb_login_password': passwd}
      data['s'] = ''
      data['securitytoken'] = 'guest'
      data['do'] = 'login'
      data['vb_login_md5password'] = ''
      data['vb_login_md5password_utf'] = ''
      data['cookieuser'] = '******'
      page = blogotubes('http://www.mersenneforum.org/login.php?do=login', data=data)
      return username in page
Esempio n. 5
0
def get_data():
    global data_file
    if "http" in data_file:
        print("Getting the current data")
        txt = blogotubes(data_file)
        if txt is None:
            raise ValueError("Couldn't get data file")
        else:
            data_file = "AllSeq.json"
            with open(data_file, "w") as f:
                f.write(txt)
Esempio n. 6
0
def get_id_info(id):
    base = "http://factordb.com/index.php?id="
    page = blogotubes(base + str(id))
    if not page:  # or 'FF' in page:
        raise ValueError("http error")
    smalls = smallfact.findall(page)
    larges = largefact.findall(page)
    comps = composite.findall(page)
    # print(compid, "\n{}\n##########################################\n\n{}".format(smalls, page))
    # apply map(get_num, ...) to the first entry of the tuples, then concatenate the result with the second entry
    larges = [num + exp for num, exp in zip(map(get_num, (l[0] for l in larges)), (l[1] for l in larges))]
    comps = {
        int(num): (int(exp[1:]) if exp else 1)
        for num, exp in zip(map(get_num, (c[0] for c in comps)), (c[1] for c in comps))
    }
    # comp = get_num(compid)
    return nt.Factors(" * ".join(smalls + larges)), comps
Esempio n. 7
0
def spider(last_pid):
     wobsite = 'http://www.mersenneforum.org/showthread.php?t=11588&page='
     backup()
     db = read_db()
     spider_msg = []

     ###############################################################################################
     # First the standalone func that processes mass text file reservations
     def parse_text_file(reservee, url):
          global email_msg
          old = {seq.seq for seq in db.values() if seq.res == reservee}
          txt = blogotubes(url)
          current = set()
          for line in txt.splitlines():
               if re.match(r'(?<![0-9])[0-9]{5,6}(?![0-9])', line):
                    seq = int(line)
                    if seq in current:
                         string = "Duplicate sequence? {} {}".format(seq, url)
                         Print(string)
                         email_msg += string+'\n'
                    else:
                         current.add(seq)
               elif not re.match(r'^[0-9]+$', line):
                    string = "Unknown line from {}: {}".format(url, line)
                    Print(string)
                    email_msg += string+'\n'
          # easy peasy lemon squeezy
          done = old - current
          new = current - old
          if done or new:
               spider_msg.append('{}: Add {}, Drop {}'.format(reservee, len(new), len(done)))
               drop_db(db, reservee, done)
               add_db(db, reservee, new)

     ###############################################################################################
     # This processes the parsed HTML and its add/drop commands, and actually affects the current reservations
     
     def process_msg(pid, name, msg):
          add = []; addkws = ('Reserv', 'reserv', 'Add', 'add', 'Tak', 'tak')
          drop = []; dropkws = ('Unreserv', 'unreserv', 'Drop', 'drop', 'Releas', 'releas')
          for line in msg.splitlines():
               if any(kw in line for kw in dropkws):
                    for s in re.findall(r'(?<![0-9])[0-9]{5,6}(?![0-9])', line): # matches only 5/6 digit numbers
                         drop.append(int(s))
               elif any(kw in line for kw in addkws):
                    for s in re.findall(r'(?<![0-9])[0-9]{5,6}(?![0-9])', line):
                         add.append(int(s))
          la = len(add)
          ld = len(drop)
          if la or ld:
               Print('{}: {} adding {}, dropping {}'.format(pid, name, repr(add), repr(drop)))
               spider_msg.append('{}: Add {}, Drop {}'.format(name, la, ld))
               add_db(db, name, add)
               drop_db(db, name, drop)

     ###############################################################################################
     # Begin the parsers, converts the various HTML into Python data structures for processing
     # Also reverse stack order
     # For each page of the thread, the parsers return a list of (post_id, author, html-replaced-post_body)
     
     # All of my previous html parsing needs have been simple enough that regexs were sufficient,
     # and a proper parser would have been overkill; this, though, is much closer to the border, and if I
     # already knew how to use any parser, I would. But the overhead is too much to start now, so...
     # thankfully there are comments in the html that are individually closed; without that, 
     # this would be substantially harder and I'd probably resort to a parser.     
     def parse_msg(msg):
          # Drop text after the last </div>
          ind = msg.rfind('</div>')
          msg = msg[:ind]
          if msg.count('<div') > 1: # There are quotes in the message
               # drop text before the second to last </div>
               ind = msg.rfind('</div>')
               msg = msg[ind+6:]
          else:
               # drop text after the first tag
               ind = msg.find('>')
               msg = msg[ind+1:]
          return msg.replace('<br />', '').strip()
     
     def parse_post(post):
          name = re.search(r'''alt="(.*?) is o''', post).group(1) # "is offline" or "is online"
          msg = re.search(r'<!-- message -->(.*?)<!-- / message -->', post, re.DOTALL).group(1)
          return name, parse_msg(msg)

     def parse_page(page):
          out = []
          posts = re.findall(r'<!-- post #([0-9]{6,7}) -->(.*?)<!-- / post #\1 -->', page, re.DOTALL)
          for post in posts:
               #name, msg = parse_post(post[1])
               out.append(  (int(post[0]),) + parse_post(post[1])  )
          return out

     #################################################################################################
     # End parsers, first one tiny helper function

     def order_posts(posts):
          if posts != sorted(posts, key=lambda post: post[0]):
               raise ValueError("Out of order posts! Pids:\n{}".format([post[0] for post in posts]))
          return posts[0][0]

     #################################################################################################
     # Now begin actual logic of top-level spider()

     html = blogotubes(wobsite+'10000') # vBulletin rounds to last page
     all_pages = [parse_page(html)]
     lowest_pid = order_posts(all_pages[0])
     if not last_pid: # If this is the first time running the script
          last_pid = lowest_pid # On first time run, ignore all but the last page
     while lowest_pid > last_pid: # It's probable that we missed some posts on previous page
          page_num = re.search('<td class="vbmenu_control" style="font-weight:normal">Page ([0-9]+)', html).group(1)
          page_num = str(int(page_num)-1)
          Print("Looks like posts were missed, checking page", page_num)
          html = blogotubes(wobsite+page_num)
          all_pages.insert(0, parse_page(html))
          lowest_pid = order_posts(all_pages[0])

     all_posts = [post for page in all_pages for post in page if post[0] > last_pid]
     if all_posts:
          order_posts(all_posts) # Assert order, ignore lowest pid retval
          for post in all_posts:
               process_msg(*post)
          last_pid = all_posts[-1][0] # Highest PID processed
     else:
          Print("No new posts!")

     for reservee, url in txtfiles.items():
          parse_text_file(reservee, url)

     if spider_msg:
          write_db(db)
          update()
          if not use_local_reservations:
               send('Spider: ' + ' | '.join(spider_msg)) # For now, doesn't check if send was successful

     return last_pid
Esempio n. 8
0
###############################################################################

import re
from time import time

from _import_hack import add_path_relative_to_script
add_path_relative_to_script('..')
# this should be removed when proper pip installation is supported
from mfaliquot.sequence import Sequence
from mfaliquot.myutils import linecount, Print, strftime, blogotubes, add_cookies, email

# Some slight modifications of the default global variables

if 'http' in info:
     txt = blogotubes(info)
     if txt is None:
          Print("Couldn't get info, no info will be updated")
          info = None
     else:
          info = dir+'/AllSeq.txt'
          with open(info, 'w') as f:
               f.write(txt)

def get_reservations(pid):
     # Copied from allseq.py
     page = blogotubes(reservation_page + '?p='+str(pid))
     # Isolate the [code] block with the reservations
     page = re.search(r'<pre.*?>(.*?)</pre>', page, flags=re.DOTALL).group(1)
     ind = page.find('\n')
     if ind == -1: # No newline means only "<b>Seq Who Index Size</b>", i.e. empty, so no reservations
Esempio n. 9
0
def get_num(id):
    page = blogotubes("http://factordb.com/index.php?showid=" + id)
    num = largedigits.search(page).group(1)
    num = re.sub(r"[^0-9]", "", num)
    return num