예제 #1
0
def init():
	status,data,update_date=cron.check()
	if status:
		#download index changed
		dic=parseIndex(data)#fetch json of data
		if dic is None:
			sys.exit(0)
		updateFiles(dic)
		stats={}
		stats['lastUpdated']=update_date.strip()
		stats["files"]={}
		for key in dic:
			if key in config.getConfig("tracking_files"):
				stats["files"][key]=dic[key].strip()
		try:
			del stats["files"]["../"] #remove parent index
		except KeyError:
			pass
		try:
			open("stats.json","w").write(json.dumps(stats, indent=4, sort_keys=True))
			log.put("Stats updated","SUCCESS")
		except:
			log.put("Cannot update stats","FAIL")
		
		log.headPut("Finished cron-dbpedia","SUCCESS")
	else:
		#No change exit silently
		sys.exit(0)
예제 #2
0
파일: cron.py 프로젝트: SRMSE/cron-dbpedia
def loadStats():
    global stats
    try:
        stats = json.loads(open("stats.json", "r").read())
        log.put("Read stats", "SUCCESS")
    except Exception as e:
        log.put("Read stats", "FAIL")
        sys.exit(0)
예제 #3
0
파일: cron.py 프로젝트: SRMSE/cron-dbpedia
def loadStats():
	global stats
	try:
		stats=json.loads(open("stats.json","r").read())
		log.put("Read stats","SUCCESS")
	except Exception as e:
		log.put("Read stats","FAIL")
		sys.exit(0)
예제 #4
0
def parseIndex(data):
	log.put("Start parsing index","INFO")
	soup=bs(data)
	pres=soup.find("pre")
	dic={}
	if pres is not None:
		aas=pres.findAll("a")
		for aaas in aas:
			if aaas is not None:
				dic[aaas.get("href").strip()]=aaas.next_sibling.split("\t")[0].strip()
		return dic
	else:
		log.put("Parsing error no pre tag","ERROR")
		return None
예제 #5
0
def dlfile(url):
    # Open the url
    try:
        f = urlopen(url)
        print "downloading " + url

        # Open our local file for writing
        with open(os.path.basename(url), "wb") as local_file:
            local_file.write(f.read())
        log.put("Download success "+url,"SUCCESS")
        log.put("Starting decompressing","INFO")
        os.system("bunzip2 -d "+os.path.basename(url))
        log.put("File decompressed "+url,"SUCCESS")
    #handle errors
    except HTTPError, e:
        print "HTTP Error:", e.code, url
        log.put("Download failed "+url,"ERROR")
예제 #6
0
파일: cron.py 프로젝트: SRMSE/cron-dbpedia
def downloadPage():
    global stats
    url = config.getConfig("base_url")
    html_data = None
    try:
        response = u.urlopen(url)
        response_headers = response.info().dict
        html_data = response.read()
        log.put("Index page downloaded", "SUCCESS")
        last_update_date = response_headers["date"].strip()
        if stats['lastUpdated'] != last_update_date:
            log.put("New version available", "INFO")
            return True, html_data, last_update_date
        else:
            log.put("New version not available", "INFO")
            return False, None, None
    except Exception as e:
        log.put("Index page failed to download", "FAIL")
        return False, None, None
예제 #7
0
파일: cron.py 프로젝트: SRMSE/cron-dbpedia
def downloadPage():
	global stats
	url=config.getConfig("base_url")
	html_data=None
	try:
		response=u.urlopen(url)
		response_headers = response.info().dict
		html_data=response.read()
		log.put("Index page downloaded","SUCCESS")
		last_update_date=response_headers["date"].strip()
		if stats['lastUpdated']!=last_update_date:
			log.put("New version available","INFO")
			return True,html_data,last_update_date
		else:
			log.put("New version not available","INFO")
			return False,None,None
	except Exception as e:
			log.put("Index page failed to download","FAIL")
			return False,None,None
예제 #8
0
def getConfig(key):
    try:
        return c[key]
    except KeyError as e:
        log.put(key + " not present in config", "WARNING")
        return None
예제 #9
0
import json, log, sys
c = None
try:
    c = json.loads(open("config.json", "r").read())
    log.put("Read config", "SUCCESS")
except Exception as e:
    log.put("Read config", "FAIL")
    sys.exit(0)


def getConfig(key):
    try:
        return c[key]
    except KeyError as e:
        log.put(key + " not present in config", "WARNING")
        return None
예제 #10
0
def getConfig(key):
	try:
		return c[key]
	except KeyError as e:
		log.put(key+" not present in config","WARNING")
		return None
예제 #11
0
import json,log,sys
c=None
try:
	c=json.loads(open("config.json","r").read())
	log.put("Read config","SUCCESS")
except Exception as e:
	log.put("Read config","FAIL")
	sys.exit(0)
def getConfig(key):
	try:
		return c[key]
	except KeyError as e:
		log.put(key+" not present in config","WARNING")
		return None
예제 #12
0
def updateFiles(dic):
	log.put("Downloading tracked files","INFO")
	li=config.getConfig("tracking_files") #list of files to be updated
	for l in li:
		try:
			if(cron.stats["files"][l]!=dic[l]):
				#file changed
				dlfile(config.getConfig("base_url")+l)
				log.put("Parsing "+config.getConfig("base_url")+l,"INFO")
				nt.parseURI(l.replace(".bz2",""),l.split(".")[0],l.replace(".bz2",""),l.split(".")[0])
				log.put("Parsed "+config.getConfig("base_url")+l,"SUCCESS")
		except KeyError:
				dlfile(config.getConfig("base_url")+l)
				log.put("Parsing "+config.getConfig("base_url")+l,"INFO")
				nt.parseURI(l.replace(".bz2",""),l.split(".")[0])
				log.put("Parsed "+config.getConfig("base_url")+l,"SUCCESS")
	log.put("Tracked files updated","SUCCESS")
	log.put("Deleting all files from cache","INFO")
	os.system("rm *.nt")
	os.system("rm *.ttl")
	log.put("Files deleted from cache","SUCCESS")
예제 #13
0
        print "downloading " + url

        # Open our local file for writing
        with open(os.path.basename(url), "wb") as local_file:
            local_file.write(f.read())
        log.put("Download success "+url,"SUCCESS")
        log.put("Starting decompressing","INFO")
        os.system("bunzip2 -d "+os.path.basename(url))
        log.put("File decompressed "+url,"SUCCESS")
    #handle errors
    except HTTPError, e:
        print "HTTP Error:", e.code, url
        log.put("Download failed "+url,"ERROR")
    except URLError, e:
        print "URL Error:", e.reason, url
        log.put("Download failed "+url,"ERROR")

def updateFiles(dic):
	log.put("Downloading tracked files","INFO")
	li=config.getConfig("tracking_files") #list of files to be updated
	for l in li:
		try:
			if(cron.stats["files"][l]!=dic[l]):
				#file changed
				dlfile(config.getConfig("base_url")+l)
				log.put("Parsing "+config.getConfig("base_url")+l,"INFO")
				nt.parseURI(l.replace(".bz2",""),l.split(".")[0],l.replace(".bz2",""),l.split(".")[0])
				log.put("Parsed "+config.getConfig("base_url")+l,"SUCCESS")
		except KeyError:
				dlfile(config.getConfig("base_url")+l)
				log.put("Parsing "+config.getConfig("base_url")+l,"INFO")