Ejemplo n.º 1
0
def checkExchangeDirs(logmail, logtype, daycode):

    hadcall = "hadoop fs -ls /data/%s/%s/" % (logtype, daycode)
    hadlines = SynchUtil.sysCallResult(hadcall)
    warncount = 0

    for excname in SynchUtil.getExchanges():

        if excname in noDataOkaySet():
            continue

        dirpath = "/data/%s/%s/%s" % (logtype, daycode, excname)

        if not any(dirpath in hline for hline in hadlines):
            logmail.addLogLine("Warning: directory not found: %s" % (dirpath))
            warncount += 1
            continue  # no point in continuing

        subhadls = "hadoop fs -ls %s" % (dirpath)
        subfiles = SynchUtil.sysCallResult(subhadls)

        if len(subfiles) < FILE_COUNT_WARN_CUTOFF:
            logmail.addLogLine("Warning: found only %d files for exchange %s" %
                               (len(subfiles), excname))
            warncount += 1

    return warncount
Ejemplo n.º 2
0
def checkExchangeDirs(logmail, logtype, daycode):
	
	hadcall = "hadoop fs -ls /data/%s/%s/" % (logtype, daycode)
	hadlines = SynchUtil.sysCallResult(hadcall)
	warncount = 0
	
	for excname in SynchUtil.getExchanges():
				
		if excname in noDataOkaySet():
			continue
		
		dirpath = "/data/%s/%s/%s" % (logtype, daycode, excname)
		
		if not any(dirpath in hline for hline in hadlines):
			logmail.addLogLine("Warning: directory not found: %s" % (dirpath))
			warncount += 1
			continue # no point in continuing
			
			
		subhadls = "hadoop fs -ls %s" % (dirpath)
		subfiles = SynchUtil.sysCallResult(subhadls)
		
		if len(subfiles) < FILE_COUNT_WARN_CUTOFF:
			logmail.addLogLine("Warning: found only %d files for exchange %s" % (len(subfiles), excname))
			warncount += 1
			
	return warncount
Ejemplo n.º 3
0
def createHdfsDirs(pixset):
	
	extantdirs = SynchUtil.sysCallResult("hadoop fs -ls %s" % HDFS_ADA_DIR)
	
	for onepix in pixset:
		
		pixpref = pixprefFromDir(onepix)
		
		if any([pixpref in exline for exline in extantdirs]):
			print "Found directory %s" % (pixpref)
		else:
			hadmkdir = "hadoop fs -mkdir %s/%s" % (HDFS_ADA_DIR, pixpref)
			print "Mkdir call is %s" % (hadmkdir)
			os.system(hadmkdir)
Ejemplo n.º 4
0
def getHDsizeMap(adex, logtype, daycode):

    sizeMap = {}
    hadlscall = "hadoop fs -ls /data/%s/%s/%s/" % (logtype, daycode, adex)

    hadooplines = SynchUtil.sysCallResult(hadlscall)

    for line in hadooplines:
        toks = line.split()

        if line.find('.gz') == -1:
            continue

        filename = toks[7].split('/')[-1]
        filesize = int(toks[4])
        sizeMap[filename] = filesize

    return sizeMap