Ejemplo n.º 1
0
def setupManiFile(daycode):

    batchdir = getBatchDir(daycode)

    if not os.path.exists(batchdir):
        os.system("mkdir %s" % batchdir)

    manimap = {}

    for onex in SynchUtil.getExchanges():
        for logtype in SynchUtil.getBigLogList():
            sizemap = LogFileDataSize.getHdfsPathList(onex, logtype, daycode)

            for onefile in sizemap:
                manimap[onefile] = sizemap[onefile]

            sizegb = sum(sizemap.values()) / 1000000000

            print "Found %d files, %d gb for %s %s %s" % (
                len(sizemap), sizegb, onex, logtype, daycode)

    fhandle = open(getManiFilePath(daycode), 'w')
    for onefile in manimap:
        fhandle.write(onefile + "\t" + str(manimap[onefile]) + "\n")
    fhandle.close()

    print "Finished building manifest, %d total files and %d gb size" % (
        len(manimap), sum(manimap.values()) / 1000000000)
Ejemplo n.º 2
0
def setupManiFile(daycode):
	
	batchdir = getBatchDir(daycode)
	
	if not os.path.exists(batchdir):
		os.system("mkdir %s" % batchdir)
	
	manimap = {}
	
	for onex in SynchUtil.getExchanges():
		for logtype in SynchUtil.getBigLogList():
			sizemap = LogFileDataSize.getHdfsPathList(onex, logtype, daycode)
			
			for onefile in sizemap:
				manimap[onefile] = sizemap[onefile]
			
			sizegb = sum(sizemap.values()) / 1000000000
			
			print "Found %d files, %d gb for %s %s %s" % (len(sizemap), sizegb, onex, logtype, daycode)
		
	
	
	fhandle = open(getManiFilePath(daycode), 'w')
	for onefile in manimap:
		fhandle.write(onefile + "\t" + str(manimap[onefile]) + "\n")
	fhandle.close()
	
	print "Finished building manifest, %d total files and %d gb size" % (len(manimap), sum(manimap.values())/1000000000)
Ejemplo n.º 3
0
def checkExchangeDirs(logmail, logtype, daycode):

    hadcall = "hadoop fs -ls /data/%s/%s/" % (logtype, daycode)
    hadlines = SynchUtil.sysCallResult(hadcall)
    warncount = 0

    for excname in SynchUtil.getExchanges():

        if excname in noDataOkaySet():
            continue

        dirpath = "/data/%s/%s/%s" % (logtype, daycode, excname)

        if not any(dirpath in hline for hline in hadlines):
            logmail.addLogLine("Warning: directory not found: %s" % (dirpath))
            warncount += 1
            continue  # no point in continuing

        subhadls = "hadoop fs -ls %s" % (dirpath)
        subfiles = SynchUtil.sysCallResult(subhadls)

        if len(subfiles) < FILE_COUNT_WARN_CUTOFF:
            logmail.addLogLine("Warning: found only %d files for exchange %s" %
                               (len(subfiles), excname))
            warncount += 1

    return warncount
Ejemplo n.º 4
0
def checkExchangeDirs(logmail, logtype, daycode):
	
	hadcall = "hadoop fs -ls /data/%s/%s/" % (logtype, daycode)
	hadlines = SynchUtil.sysCallResult(hadcall)
	warncount = 0
	
	for excname in SynchUtil.getExchanges():
				
		if excname in noDataOkaySet():
			continue
		
		dirpath = "/data/%s/%s/%s" % (logtype, daycode, excname)
		
		if not any(dirpath in hline for hline in hadlines):
			logmail.addLogLine("Warning: directory not found: %s" % (dirpath))
			warncount += 1
			continue # no point in continuing
			
			
		subhadls = "hadoop fs -ls %s" % (dirpath)
		subfiles = SynchUtil.sysCallResult(subhadls)
		
		if len(subfiles) < FILE_COUNT_WARN_CUTOFF:
			logmail.addLogLine("Warning: found only %d files for exchange %s" % (len(subfiles), excname))
			warncount += 1
			
	return warncount