コード例 #1
0
# -*- coding: utf-8 -*-
from .utils import helper
import re
import time
import datetime
from utils.db import upstream
from utils.types import UpstreamRelease
from utils.cache import Cache

NAME = "subversion"
source_id = upstream.source("subversion", "custom subversion crawler")

def get_date(m_d):
	if m_d.has_key("day") and m_d["day"]:
		if m_d.has_key("smonth") and m_d["smonth"]:
			try:
				return datetime.datetime.strptime(" ".join([m_d["day"],m_d["smonth"],m_d["year"]]),"%d %b %Y")
			except e:
				print "ERROR: parsing date failed: %s"%e
		elif m_d.has_key("month"):
			#special case
			if m_d["month"]=="Sept":
				m_d["month"]="September"
			try:
				return datetime.datetime.strptime(" ".join([m_d["day"],m_d["month"],m_d["year"]]),"%d %B %Y")
			except e:
				print "ERROR: parsing date failed: %s"%e
		else:
			return None
	return None
コード例 #2
0
sys.path.append(".")

from utils import helper
from utils import parsers
import httplib
import time
import re
import datetime
import urllib2
from utils.db import sf as sf_module
from utils.db import upstream
from utils.cache import Cache

NAME="sourceforge"

source_id = upstream.source("sf", "generic sourceforge crawler")

def get_files(project_id, paths=["/"], last_crawl=None):
	limit = 10
	if last_crawl==None:
		limit = 100
	
	i = 0
	files = []
	for path in paths:
		fn = "files/sourceforge/%d-%s-%d.rss"%(time.time(),project_id,i)
		try:
			ret = helper.open_url("http://sourceforge.net/api/file/index/project-id/%s/rss?path=%s"%(project_id,path),fn)
		except httplib.BadStatusLine:
			print "ERROR bad status"
			return []
コード例 #3
0
# -*- coding: utf-8 -*-
import sys
sys.path.append(".")

from utils import helper
from utils import parsers
from utils.db import explore as explore_module
from utils.db import upstream
from utils.cache import Cache

NAME = "explore"
source_id = upstream.source("explore", "generic directory crawler")


def contains(s, parts):
    for p in parts:
        if p in s:
            return True
    return False


def explore(url, depth, good, bad, fn_remove, badv, dead, last_crawl):
    pkgs = []
    print url
    info = helper.open_dir(url)

    if depth != None and depth > 0:
        new_depth = depth - 1
    elif depth == None:
        new_depth = None
コード例 #4
0
# -*- coding: utf-8 -*-
import sys
sys.path.append(".")

from utils import helper
from utils import parsers
from utils.db import explore as explore_module
from utils.db import upstream
from utils.cache import Cache

NAME="explore"
source_id = upstream.source("explore", "generic directory crawler")

def contains(s, parts):
	for p in parts:
		if p in s:
			return True
	return False

def explore(url, depth, good, bad, fn_remove, badv, dead, last_crawl):
	#print url
	pkgs = []
	info = helper.open_dir(url)
	
	if depth!=None and depth>0:
		new_depth = depth - 1
	elif depth==None:
		new_depth = None
	
	if info==None:
		return []
コード例 #5
0
# -*- coding: utf-8 -*-
from .utils import helper
import re
import time
import datetime
from utils.db import upstream
from utils.types import UpstreamRelease
from utils.cache import Cache

NAME = "subversion"
source_id = upstream.source("subversion", "custom subversion crawler")


def get_date(m_d):
    if m_d.has_key("day") and m_d["day"]:
        if m_d.has_key("smonth") and m_d["smonth"]:
            try:
                return datetime.datetime.strptime(
                    " ".join([m_d["day"], m_d["smonth"], m_d["year"]]),
                    "%d %b %Y")
            except e:
                print "ERROR: parsing date failed: %s" % e
        elif m_d.has_key("month"):
            #special case
            if m_d["month"] == "Sept":
                m_d["month"] = "September"
            try:
                return datetime.datetime.strptime(
                    " ".join([m_d["day"], m_d["month"], m_d["year"]]),
                    "%d %B %Y")
            except e:
コード例 #6
0
# -*- coding: utf-8 -*-
import sys

sys.path.append(".")
from utils import helper
import re
import time
import datetime
from utils.db import upstream
from utils.types import UpstreamRelease
from utils.cache import Cache

NAME = "mysql"
source_id = upstream.source("mysql", "custom mysql crawler")
VERSIONS = ["mysql-5.1", "mysql-5.0", "mysql-4.1", "mysql-6.0"]


def get_date(m_d):
    if m_d.has_key("day") and m_d["day"]:
        if m_d.has_key("smonth") and m_d["smonth"]:
            try:
                return datetime.datetime.strptime(" ".join([m_d["day"], m_d["smonth"], m_d["year"]]), "%d %b %Y")
            except e:
                print "ERROR: parsing date failed: %s" % e
        elif m_d.has_key("month"):
            # special case
            if m_d["month"] == "Sept":
                m_d["month"] = "September"
            try:
                return datetime.datetime.strptime(" ".join([m_d["day"], m_d["month"], m_d["year"]]), "%d %B %Y")
            except e:
コード例 #7
0
# -*- coding: utf-8 -*-
import datetime
import sys
sys.path.append(".")

from utils import helper
from utils import parsers
from utils.db import upstream
from utils.cache import Cache

NAME = "php"
source_id = upstream.source("php", "custom php crawler")

MIRROR = "http://us3.php.net"


def print_helper(a, depth=0):
    #print type(a),a
    if type(a) == dict:
        for key in a:
            print_helper(key, depth + 1)
            print_helper(a[key], depth + 1)
    else:
        print "  " * depth + str(a)


def _deserialize(tokens):
    first = tokens.pop(0)
    result = None
    if first == "a":
        result = {}
コード例 #8
0
ファイル: php.py プロジェクト: alerque/open-source-watershed
# -*- coding: utf-8 -*-
import datetime
import sys
sys.path.append(".")

from utils import helper
from utils import parsers
from utils.db import upstream
from utils.cache import Cache

NAME="php"
source_id = upstream.source("php", "custom php crawler")

MIRROR="http://us3.php.net"

def print_helper(a,depth=0):
	#print type(a),a
	if type(a) == dict:
		for key in a:
			print_helper(key, depth+1)
			print_helper(a[key], depth+1)
	else:
		print "  "*depth+str(a)

def _deserialize(tokens):
	first = tokens.pop(0)
	result = None
	if first=="a":
		result = {}
		length = int(tokens.pop(0))
		for i in range(length):