# -*- coding: utf-8 -*- from .utils import helper import re import time import datetime from utils.db import upstream from utils.types import UpstreamRelease from utils.cache import Cache NAME = "subversion" source_id = upstream.source("subversion", "custom subversion crawler") def get_date(m_d): if m_d.has_key("day") and m_d["day"]: if m_d.has_key("smonth") and m_d["smonth"]: try: return datetime.datetime.strptime(" ".join([m_d["day"],m_d["smonth"],m_d["year"]]),"%d %b %Y") except e: print "ERROR: parsing date failed: %s"%e elif m_d.has_key("month"): #special case if m_d["month"]=="Sept": m_d["month"]="September" try: return datetime.datetime.strptime(" ".join([m_d["day"],m_d["month"],m_d["year"]]),"%d %B %Y") except e: print "ERROR: parsing date failed: %s"%e else: return None return None
sys.path.append(".") from utils import helper from utils import parsers import httplib import time import re import datetime import urllib2 from utils.db import sf as sf_module from utils.db import upstream from utils.cache import Cache NAME="sourceforge" source_id = upstream.source("sf", "generic sourceforge crawler") def get_files(project_id, paths=["/"], last_crawl=None): limit = 10 if last_crawl==None: limit = 100 i = 0 files = [] for path in paths: fn = "files/sourceforge/%d-%s-%d.rss"%(time.time(),project_id,i) try: ret = helper.open_url("http://sourceforge.net/api/file/index/project-id/%s/rss?path=%s"%(project_id,path),fn) except httplib.BadStatusLine: print "ERROR bad status" return []
# -*- coding: utf-8 -*- import sys sys.path.append(".") from utils import helper from utils import parsers from utils.db import explore as explore_module from utils.db import upstream from utils.cache import Cache NAME = "explore" source_id = upstream.source("explore", "generic directory crawler") def contains(s, parts): for p in parts: if p in s: return True return False def explore(url, depth, good, bad, fn_remove, badv, dead, last_crawl): pkgs = [] print url info = helper.open_dir(url) if depth != None and depth > 0: new_depth = depth - 1 elif depth == None: new_depth = None
# -*- coding: utf-8 -*- import sys sys.path.append(".") from utils import helper from utils import parsers from utils.db import explore as explore_module from utils.db import upstream from utils.cache import Cache NAME="explore" source_id = upstream.source("explore", "generic directory crawler") def contains(s, parts): for p in parts: if p in s: return True return False def explore(url, depth, good, bad, fn_remove, badv, dead, last_crawl): #print url pkgs = [] info = helper.open_dir(url) if depth!=None and depth>0: new_depth = depth - 1 elif depth==None: new_depth = None if info==None: return []
# -*- coding: utf-8 -*- from .utils import helper import re import time import datetime from utils.db import upstream from utils.types import UpstreamRelease from utils.cache import Cache NAME = "subversion" source_id = upstream.source("subversion", "custom subversion crawler") def get_date(m_d): if m_d.has_key("day") and m_d["day"]: if m_d.has_key("smonth") and m_d["smonth"]: try: return datetime.datetime.strptime( " ".join([m_d["day"], m_d["smonth"], m_d["year"]]), "%d %b %Y") except e: print "ERROR: parsing date failed: %s" % e elif m_d.has_key("month"): #special case if m_d["month"] == "Sept": m_d["month"] = "September" try: return datetime.datetime.strptime( " ".join([m_d["day"], m_d["month"], m_d["year"]]), "%d %B %Y") except e:
# -*- coding: utf-8 -*- import sys sys.path.append(".") from utils import helper import re import time import datetime from utils.db import upstream from utils.types import UpstreamRelease from utils.cache import Cache NAME = "mysql" source_id = upstream.source("mysql", "custom mysql crawler") VERSIONS = ["mysql-5.1", "mysql-5.0", "mysql-4.1", "mysql-6.0"] def get_date(m_d): if m_d.has_key("day") and m_d["day"]: if m_d.has_key("smonth") and m_d["smonth"]: try: return datetime.datetime.strptime(" ".join([m_d["day"], m_d["smonth"], m_d["year"]]), "%d %b %Y") except e: print "ERROR: parsing date failed: %s" % e elif m_d.has_key("month"): # special case if m_d["month"] == "Sept": m_d["month"] = "September" try: return datetime.datetime.strptime(" ".join([m_d["day"], m_d["month"], m_d["year"]]), "%d %B %Y") except e:
# -*- coding: utf-8 -*- import datetime import sys sys.path.append(".") from utils import helper from utils import parsers from utils.db import upstream from utils.cache import Cache NAME = "php" source_id = upstream.source("php", "custom php crawler") MIRROR = "http://us3.php.net" def print_helper(a, depth=0): #print type(a),a if type(a) == dict: for key in a: print_helper(key, depth + 1) print_helper(a[key], depth + 1) else: print " " * depth + str(a) def _deserialize(tokens): first = tokens.pop(0) result = None if first == "a": result = {}
# -*- coding: utf-8 -*- import datetime import sys sys.path.append(".") from utils import helper from utils import parsers from utils.db import upstream from utils.cache import Cache NAME="php" source_id = upstream.source("php", "custom php crawler") MIRROR="http://us3.php.net" def print_helper(a,depth=0): #print type(a),a if type(a) == dict: for key in a: print_helper(key, depth+1) print_helper(a[key], depth+1) else: print " "*depth+str(a) def _deserialize(tokens): first = tokens.pop(0) result = None if first=="a": result = {} length = int(tokens.pop(0)) for i in range(length):