def crawl_repo(repo):
    rels = []
    fn = "".join(
        ("files/freebsd/", str(repo.component), "-", str(time.time()), ".txt"))
    url = "".join((CRAWL_DIR, repo.architecture, "/packages-",
                   str(repo.codename), "-", repo.branch, "/INDEX"))
    t = helper.open_url(url, fn, repo.last_crawl)
    if t:
        f = open(fn)
        for line in f:
            pkg_ver = line.split("|", 1)
            pkg, ver = pkg_ver[0].rsplit("-", 1)
            if "," in ver:
                ver = ver.rsplit(",", 1)[0]
            if "_" in ver:
                ver, rev = ver.rsplit("_", 1)
            else:
                rev = 0
            rel = DownstreamRelease()
            rel.repo_id = repo.id
            rel.package = pkg
            rel.version = ver
            rel.revision = rev
            rel.released = t
            rels.append(rel)
        f.close()
    if t == None:
        t = repo.last_crawl
    return (t, rels)
def crawl_repo(repo):
	rels = []
	fn = "".join(("files/freebsd/",str(repo.component),"-",str(time.time()),".txt"))
	url = "".join((CRAWL_DIR,repo.architecture,"/packages-",str(repo.codename),"-",repo.branch,"/INDEX"))
	t = helper.open_url(url,fn,repo.last_crawl)
	if t:
			f = open(fn)
			for line in f:
				pkg_ver = line.split("|",1)
				pkg, ver = pkg_ver[0].rsplit("-",1)
				if "," in ver:
					ver = ver.rsplit(",",1)[0]
				if "_" in ver:
					ver, rev = ver.rsplit("_",1)
				else:
					rev = 0
				rel = DownstreamRelease()
				rel.repo_id = repo.id
				rel.package = pkg
				rel.version = ver
				rel.revision = rev
				rel.released = t
				rels.append(rel)
			f.close()
	if t == None:
		t = repo.last_crawl
	return (t, rels)
Beispiel #3
0
def get_files(project_id,last_crawl=None):
	limit = 10
	if last_crawl==None:
		limit = 100
	
	fn = "files/sourceforge/%d-%s.rss"%(time.time(),project_id)
	try:
		ret = helper.open_url("http://sourceforge.net/api/file/index/project-id/%s/rss"%(project_id,),fn)
	except httplib.BadStatusLine:
		print "ERROR bad status"
		return []
	except urllib2.URLError:
		print "ERROR UrlError"
		return []
	
	if ret==None:
		print " ERROR"
		return []
	
	pattern_file = re.compile("<link>http://sourceforge.net/projects/.*/(\S*)/download</link>")
	pattern_date = re.compile("<pubDate>(.*) [\+-][0-9]{4}</pubDate>")
	
	files = []
	fs = []
	for line in open(fn):
		tmp_fs = pattern_file.findall(line)
		if len(tmp_fs)>0:
			fs=tmp_fs
		ds = pattern_date.findall(line)
		if len(ds)>0:
			d = datetime.datetime.strptime(ds[0],"%a, %d %b %Y %H:%M:%S")
			for f in fs:
				files.append((f,d))
				fs = []
	return files
def get_releases(last_crawl=None):
    pkgs = []
    for ver in VERSIONS:
        filename = "files/mysql/list-" + ver + "-" + str(time.time()) + ".html"
        info = helper.open_url("http://downloads.mysql.com/archives.php?p=" + ver + "&o=other", filename, last_crawl)
        if info == None:
            return pkgs

        changes = open(filename)
        date = "(?P<day>[0-9][0-9]?) ((?P<smonth>[A-Z][a-z][a-z])|(?P<month>[A-Z][a-z]+)) (?P<year>[0-9]{4})"
        version = "(?P<version>[0-9\.\-a-z]+)"
        bracket_stuff = "( \[.*\])?"
        # 									 Version 1.1.3
        line_pattern = re.compile(
            '<a href="/archives/%s/mysql-%s\.tar\.gz"><strong>mysql-[0-9\.\-a-z]+.tar.gz</strong> \(%s, .*\)</a><br />'
            % (ver, version, date)
        )
        for line in changes:
            line = line.strip("\n")
            m = line_pattern.search(line)
            if m:
                rel = UpstreamRelease()
                rel.package = "mysql"
                m_d = m.groupdict()
                if m_d.has_key("version"):
                    rel.version = m_d["version"]

                if m_d.has_key("year"):
                    rel.released = get_date(m_d)
                if rel.version and rel.released and (last_crawl == None or rel.released > last_crawl):
                    pkgs.append(rel)
    return pkgs
Beispiel #5
0
def get_releases(last_crawl=None):
	pkgs = []
	for version in range(4,8):
		fn = "files/helper/php%d.txt"%version
		helper.open_url(MIRROR+"/releases/index.php?serialize=1&version=%d&max=3000"%version,fn)
		f = open(fn)
		s = f.read()
		f.close()

		d = deserialize(s)
		f = flatten(d)
		if f == None: #no filenames found
			continue
		
		for date,fn in f:
			rel = parsers.parse_filename(fn)
			if rel!=None and "pl" not in rel.version:
				rel.released = date
				pkgs.append(rel)
	return pkgs
def get_releases(last_crawl=None):
    pkgs = []
    for version in range(4, 8):
        fn = "files/helper/php%d.txt" % version
        helper.open_url(
            MIRROR +
            "/releases/index.php?serialize=1&version=%d&max=3000" % version,
            fn)
        f = open(fn)
        s = f.read()
        f.close()

        d = deserialize(s)
        f = flatten(d)
        if f == None:  #no filenames found
            continue

        for date, fn in f:
            rel = parsers.parse_filename(fn)
            if rel != None and "pl" not in rel.version:
                rel.released = date
                pkgs.append(rel)
    return pkgs
Beispiel #7
0
def get_files(project_id, paths=["/"], last_crawl=None):
	limit = 10
	if last_crawl==None:
		limit = 100
	
	i = 0
	files = []
	for path in paths:
		fn = "files/sourceforge/%d-%s-%d.rss"%(time.time(),project_id,i)
		try:
			ret = helper.open_url("http://sourceforge.net/api/file/index/project-id/%s/rss?path=%s"%(project_id,path),fn)
		except httplib.BadStatusLine:
			print "ERROR bad status"
			return []
		except urllib2.URLError:
			print "ERROR UrlError"
			return []
		
		if ret==None:
			print " ERROR"
			return []
		
		pattern_file = re.compile("<link>http://sourceforge.net/projects/.*%2F(\S*)/download</link>")
		pattern_date = re.compile("<pubDate>(.*) [\+-][0-9]{4}</pubDate>")
		
		fs = []
		for line in open(fn):
			tmp_fs = pattern_file.findall(line)
			if len(tmp_fs)>0:
				fs=tmp_fs
			ds = pattern_date.findall(line)
			if len(ds)>0:
				d = datetime.datetime.strptime(ds[0],"%a, %d %b %Y %H:%M:%S")
				for f in fs:
					files.append((f,d))
					fs = []
	i += 1
	return files
  else:
    rest = version
    epoch = 0

  if "-" in rest:
    rest, debv = rest.rsplit("-",1)
  else:
    debv = 0
  return epoch, rest, debv
p_map = {}

for comp in ["main","multiverse","restricted","universe"]:
  url = "http://" + MIRROR + "/" + HTTP_START_DIR + "intrepid/" + comp + "/source/Sources.bz2"
  filename = "files/ubuntu/Sources-intrepid-" + comp + "-" + str(time.time()) + ".bz2"
  
  info = helper.open_url(url, filename)
  pkgs = deb.parse_packages(version_parser, filename, url)
  
  for p in pkgs:
    p = eval(p[-1])
    if not p.has_key("Package") or not p.has_key("Binary"):
      continue
    
    if not p_map.has_key(p["Package"]):
      p_map[p["Package"]] = []
    
    p_map[p["Package"]]+=p["Binary"].split(", ")

HOST, USER, PASSWORD, DB = helper.mysql_settings()

con = mysql.connect(host=HOST,user=USER,passwd=PASSWORD,db=DB)
Beispiel #9
0
    if "-" in rest:
        rest, debv = rest.rsplit("-", 1)
    else:
        debv = 0
    return epoch, rest, debv


p_map = {}

for comp in ["main", "multiverse", "restricted", "universe"]:
    url = "http://" + MIRROR + "/" + HTTP_START_DIR + "intrepid/" + comp + "/source/Sources.bz2"
    filename = "files/ubuntu/Sources-intrepid-" + comp + "-" + str(
        time.time()) + ".bz2"

    info = helper.open_url(url, filename)
    pkgs = deb.parse_packages(version_parser, filename, url)

    for p in pkgs:
        p = eval(p[-1])
        if not p.has_key("Package") or not p.has_key("Binary"):
            continue

        if not p_map.has_key(p["Package"]):
            p_map[p["Package"]] = []

        p_map[p["Package"]] += p["Binary"].split(", ")

HOST, USER, PASSWORD, DB = helper.mysql_settings()

con = mysql.connect(host=HOST, user=USER, passwd=PASSWORD, db=DB)