def getReleases(url, max=-1, full=False): "Find all releases on a given URL" if url[:4] != 'http': url = baseUrl + '/' + url rels = [] doc = URLGetter(url).getsoup() start = doc.find(text=re.compile('MAIN CONTENT')).parent if start: doc = start relp = re.compile('/release/\?id=(\d+)') alist = doc.findAll('a', href=relp) # print alist for a in alist: # print a.contents for at in a.attrs: # print at if at[0] == 'href': m = relp.match(at[1]) if a.contents: r = Release(int(m.group(1)), a.contents[0]) else: r = Release(int(m.group(1))) if full: r.load() rels.append(r) if max > 0 and len(rels) >= max: break return rels
def findGroups(name): groups = [] print '"%s"' % (name) doc = URLGetter(baseUrl + '/search/?seinsel=groups&search=%s' % (name)).getsoup() # doc = BeautifulSoup(urllib.urlopen('http://noname.c64.org/csdb/search/?seinsel=groups&search=%s' % name)) try: res = doc.ol.findAll('a') except: print 'Direct result, figuring out ID' r = re.compile('/group/\?id=(\d+).*votes') x = doc.find(href=r) id = int(r.findall(x.attrs[0][1])[0]) return [(id, name)] for r in res: id = int(re.compile('id=(\w+)').findall(r.attrs[0][1])[0]) name = r.string groups.append((id, name)) return groups
def getGroup(id): doc = URLGetter(baseUrl + '/group/?id=%s' % (id)).getsoup() start = doc.find(text=re.compile('MAIN CONTENT')).parent group = start.find('font') name = fixhtml(group.string.strip()) t = re.compile('\((.*)\)').findall(group.next.next) tla = '' if t: tla = t[0] reltab = doc.body.find(text=re.compile('Releases')).findNext('table') alist = reltab.findAll('tr') releases = [] url = '' for tr in alist: # print tr rname = '???????' rid = -1 url = '' event = '' year = '?' type = '?' try: f = tr.findAll('font') year = f[1].string.strip() if len(f) >= 3: type = fixhtml(f[2].string).strip() except: pass t = tr.find('a', href=re.compile('/release/\?')) if t: rname = fixhtml(t.string).strip() print rname try: rid = int(re.compile('id=(\w+)').findall(t.attrs[0][1])[0]) except: print "Could not parse ID" pass tds = tr.findAll('td') year = tds[2].font.string type = tds[3].font.string if type.startswith(' '): type = type[6:] print " " + type if type == 'Crack': print "Skipping Crack " + rname continue t = tr.find('a', href=re.compile('/release/download')) if t: url = baseUrl + t.attrs[0][1] url = url.strip() print url releases.append(Release(rid, rname, name, type, year, [url])) return Group(id, name, tla, releases)
def load(self): doc = URLGetter(baseUrl + '/release/?id=%s' % (self.id)).getsoup() if not doc: return start = doc.find(text=re.compile('MAIN CONTENT')).parent anyword = re.compile('\w+') name = '?' # gid = -1 group = '?' type = '?' rdate = '?' artist = None downloads = None vote = None score = None compo = '?' party = '?' place2 = place = '?' print "Loading release" try: name = start.find('font').string release_by = start.find(text=re.compile('Release.*by')) # ag = release_by.findNext('a') # gid = int(re.compile('id=(\w+)').findall(ag.attrs[0][1])[0]) group = release_by.findNext(text=anyword) except: pass try: vote = start.find(text=re.compile('.*votes.*')) if vote: # 7.7/10 (62 votes) m = re.match('[^\d]*([\d\.]+)', vote) score = m.group(1) if len(score) == 1: score = score + '.0' except: pass try: credits = start.find(text=re.compile('Credits :')) gfx = credits.findNext(text=re.compile('Graphics')) artist = gfx.findNext('a').string except: pass try: type = start.find(text=re.compile('Type')).findNext(text=anyword) rdate = start.find(text=re.compile('Release.*ate')).findNext(text=anyword) except: pass try: downloads = start.find(text=re.compile('Download')).findNext('table').findAll(text=re.compile('tp://')) except: pass # print "'%s' by '%s' (%d), Release date '%s'" % (name, group, gid, rdate) try: rstart = start.find(text=re.compile('Released At')) party = rstart.findNext(text=anyword) compo = type except: pass try: astart = start.find(text=re.compile('Achievements')) print astart where = astart.findNext(text=anyword) if where: compo, party = where.split(' at ') print party place = where.next[where.next.find('#') + 1:] print place except: pass creator = group cstart = start.find(text=re.compile('Credits')) musicBy = codeBy = graphicsBy = None try: codeBy = cstart.findNext(text='Code').next.next.findNext(text=anyword) except: pass try: musicBy = cstart.findNext(text='Music').next.next.findNext(text=anyword) except: pass try: graphicsBy = cstart.findNext(text='Graphics').next.next.findNext(text=anyword) except: pass if type == 'C64 Music': creator = musicBy elif type == 'C64 Graphics': creator = graphicsBy print downloads if not artist: print "ARTIST < " + group artist = group if name != '?': self.name = fixhtml(name) if artist and artist != '?': self.artist = fixhtml(artist) if group != '?': self.group = fixhtml(group) if type != '?': self.type = fixhtml(type) if rdate != '?': self.date = fixhtml(rdate) if downloads: self.downloads = downloads if score: self.score = score if compo != '?': self.compo = fixhtml(compo) print self.compo if party != '?': self.party = fixhtml(party) if place != '?': self.place = int(place) self.place2 = "%02d" % self.place if creator: self.creator = fixhtml(creator) self.loaded = True self.update()