def crawl_repo(repo): rels = [] fn = "".join( ("files/freebsd/", str(repo.component), "-", str(time.time()), ".txt")) url = "".join((CRAWL_DIR, repo.architecture, "/packages-", str(repo.codename), "-", repo.branch, "/INDEX")) t = helper.open_url(url, fn, repo.last_crawl) if t: f = open(fn) for line in f: pkg_ver = line.split("|", 1) pkg, ver = pkg_ver[0].rsplit("-", 1) if "," in ver: ver = ver.rsplit(",", 1)[0] if "_" in ver: ver, rev = ver.rsplit("_", 1) else: rev = 0 rel = DownstreamRelease() rel.repo_id = repo.id rel.package = pkg rel.version = ver rel.revision = rev rel.released = t rels.append(rel) f.close() if t == None: t = repo.last_crawl return (t, rels)
def crawl_repo(repo): rels = [] fn = "".join(("files/freebsd/",str(repo.component),"-",str(time.time()),".txt")) url = "".join((CRAWL_DIR,repo.architecture,"/packages-",str(repo.codename),"-",repo.branch,"/INDEX")) t = helper.open_url(url,fn,repo.last_crawl) if t: f = open(fn) for line in f: pkg_ver = line.split("|",1) pkg, ver = pkg_ver[0].rsplit("-",1) if "," in ver: ver = ver.rsplit(",",1)[0] if "_" in ver: ver, rev = ver.rsplit("_",1) else: rev = 0 rel = DownstreamRelease() rel.repo_id = repo.id rel.package = pkg rel.version = ver rel.revision = rev rel.released = t rels.append(rel) f.close() if t == None: t = repo.last_crawl return (t, rels)
def get_files(project_id,last_crawl=None): limit = 10 if last_crawl==None: limit = 100 fn = "files/sourceforge/%d-%s.rss"%(time.time(),project_id) try: ret = helper.open_url("http://sourceforge.net/api/file/index/project-id/%s/rss"%(project_id,),fn) except httplib.BadStatusLine: print "ERROR bad status" return [] except urllib2.URLError: print "ERROR UrlError" return [] if ret==None: print " ERROR" return [] pattern_file = re.compile("<link>http://sourceforge.net/projects/.*/(\S*)/download</link>") pattern_date = re.compile("<pubDate>(.*) [\+-][0-9]{4}</pubDate>") files = [] fs = [] for line in open(fn): tmp_fs = pattern_file.findall(line) if len(tmp_fs)>0: fs=tmp_fs ds = pattern_date.findall(line) if len(ds)>0: d = datetime.datetime.strptime(ds[0],"%a, %d %b %Y %H:%M:%S") for f in fs: files.append((f,d)) fs = [] return files
def get_releases(last_crawl=None): pkgs = [] for ver in VERSIONS: filename = "files/mysql/list-" + ver + "-" + str(time.time()) + ".html" info = helper.open_url("http://downloads.mysql.com/archives.php?p=" + ver + "&o=other", filename, last_crawl) if info == None: return pkgs changes = open(filename) date = "(?P<day>[0-9][0-9]?) ((?P<smonth>[A-Z][a-z][a-z])|(?P<month>[A-Z][a-z]+)) (?P<year>[0-9]{4})" version = "(?P<version>[0-9\.\-a-z]+)" bracket_stuff = "( \[.*\])?" # Version 1.1.3 line_pattern = re.compile( '<a href="/archives/%s/mysql-%s\.tar\.gz"><strong>mysql-[0-9\.\-a-z]+.tar.gz</strong> \(%s, .*\)</a><br />' % (ver, version, date) ) for line in changes: line = line.strip("\n") m = line_pattern.search(line) if m: rel = UpstreamRelease() rel.package = "mysql" m_d = m.groupdict() if m_d.has_key("version"): rel.version = m_d["version"] if m_d.has_key("year"): rel.released = get_date(m_d) if rel.version and rel.released and (last_crawl == None or rel.released > last_crawl): pkgs.append(rel) return pkgs
def get_releases(last_crawl=None): pkgs = [] for version in range(4,8): fn = "files/helper/php%d.txt"%version helper.open_url(MIRROR+"/releases/index.php?serialize=1&version=%d&max=3000"%version,fn) f = open(fn) s = f.read() f.close() d = deserialize(s) f = flatten(d) if f == None: #no filenames found continue for date,fn in f: rel = parsers.parse_filename(fn) if rel!=None and "pl" not in rel.version: rel.released = date pkgs.append(rel) return pkgs
def get_releases(last_crawl=None): pkgs = [] for version in range(4, 8): fn = "files/helper/php%d.txt" % version helper.open_url( MIRROR + "/releases/index.php?serialize=1&version=%d&max=3000" % version, fn) f = open(fn) s = f.read() f.close() d = deserialize(s) f = flatten(d) if f == None: #no filenames found continue for date, fn in f: rel = parsers.parse_filename(fn) if rel != None and "pl" not in rel.version: rel.released = date pkgs.append(rel) return pkgs
def get_files(project_id, paths=["/"], last_crawl=None): limit = 10 if last_crawl==None: limit = 100 i = 0 files = [] for path in paths: fn = "files/sourceforge/%d-%s-%d.rss"%(time.time(),project_id,i) try: ret = helper.open_url("http://sourceforge.net/api/file/index/project-id/%s/rss?path=%s"%(project_id,path),fn) except httplib.BadStatusLine: print "ERROR bad status" return [] except urllib2.URLError: print "ERROR UrlError" return [] if ret==None: print " ERROR" return [] pattern_file = re.compile("<link>http://sourceforge.net/projects/.*%2F(\S*)/download</link>") pattern_date = re.compile("<pubDate>(.*) [\+-][0-9]{4}</pubDate>") fs = [] for line in open(fn): tmp_fs = pattern_file.findall(line) if len(tmp_fs)>0: fs=tmp_fs ds = pattern_date.findall(line) if len(ds)>0: d = datetime.datetime.strptime(ds[0],"%a, %d %b %Y %H:%M:%S") for f in fs: files.append((f,d)) fs = [] i += 1 return files
else: rest = version epoch = 0 if "-" in rest: rest, debv = rest.rsplit("-",1) else: debv = 0 return epoch, rest, debv p_map = {} for comp in ["main","multiverse","restricted","universe"]: url = "http://" + MIRROR + "/" + HTTP_START_DIR + "intrepid/" + comp + "/source/Sources.bz2" filename = "files/ubuntu/Sources-intrepid-" + comp + "-" + str(time.time()) + ".bz2" info = helper.open_url(url, filename) pkgs = deb.parse_packages(version_parser, filename, url) for p in pkgs: p = eval(p[-1]) if not p.has_key("Package") or not p.has_key("Binary"): continue if not p_map.has_key(p["Package"]): p_map[p["Package"]] = [] p_map[p["Package"]]+=p["Binary"].split(", ") HOST, USER, PASSWORD, DB = helper.mysql_settings() con = mysql.connect(host=HOST,user=USER,passwd=PASSWORD,db=DB)
if "-" in rest: rest, debv = rest.rsplit("-", 1) else: debv = 0 return epoch, rest, debv p_map = {} for comp in ["main", "multiverse", "restricted", "universe"]: url = "http://" + MIRROR + "/" + HTTP_START_DIR + "intrepid/" + comp + "/source/Sources.bz2" filename = "files/ubuntu/Sources-intrepid-" + comp + "-" + str( time.time()) + ".bz2" info = helper.open_url(url, filename) pkgs = deb.parse_packages(version_parser, filename, url) for p in pkgs: p = eval(p[-1]) if not p.has_key("Package") or not p.has_key("Binary"): continue if not p_map.has_key(p["Package"]): p_map[p["Package"]] = [] p_map[p["Package"]] += p["Binary"].split(", ") HOST, USER, PASSWORD, DB = helper.mysql_settings() con = mysql.connect(host=HOST, user=USER, passwd=PASSWORD, db=DB)