def setupManiFile(daycode): batchdir = getBatchDir(daycode) if not os.path.exists(batchdir): os.system("mkdir %s" % batchdir) manimap = {} for onex in SynchUtil.getExchanges(): for logtype in SynchUtil.getBigLogList(): sizemap = LogFileDataSize.getHdfsPathList(onex, logtype, daycode) for onefile in sizemap: manimap[onefile] = sizemap[onefile] sizegb = sum(sizemap.values()) / 1000000000 print "Found %d files, %d gb for %s %s %s" % ( len(sizemap), sizegb, onex, logtype, daycode) fhandle = open(getManiFilePath(daycode), 'w') for onefile in manimap: fhandle.write(onefile + "\t" + str(manimap[onefile]) + "\n") fhandle.close() print "Finished building manifest, %d total files and %d gb size" % ( len(manimap), sum(manimap.values()) / 1000000000)
def setupManiFile(daycode): batchdir = getBatchDir(daycode) if not os.path.exists(batchdir): os.system("mkdir %s" % batchdir) manimap = {} for onex in SynchUtil.getExchanges(): for logtype in SynchUtil.getBigLogList(): sizemap = LogFileDataSize.getHdfsPathList(onex, logtype, daycode) for onefile in sizemap: manimap[onefile] = sizemap[onefile] sizegb = sum(sizemap.values()) / 1000000000 print "Found %d files, %d gb for %s %s %s" % (len(sizemap), sizegb, onex, logtype, daycode) fhandle = open(getManiFilePath(daycode), 'w') for onefile in manimap: fhandle.write(onefile + "\t" + str(manimap[onefile]) + "\n") fhandle.close() print "Finished building manifest, %d total files and %d gb size" % (len(manimap), sum(manimap.values())/1000000000)
def checkExchangeDirs(logmail, logtype, daycode): hadcall = "hadoop fs -ls /data/%s/%s/" % (logtype, daycode) hadlines = SynchUtil.sysCallResult(hadcall) warncount = 0 for excname in SynchUtil.getExchanges(): if excname in noDataOkaySet(): continue dirpath = "/data/%s/%s/%s" % (logtype, daycode, excname) if not any(dirpath in hline for hline in hadlines): logmail.addLogLine("Warning: directory not found: %s" % (dirpath)) warncount += 1 continue # no point in continuing subhadls = "hadoop fs -ls %s" % (dirpath) subfiles = SynchUtil.sysCallResult(subhadls) if len(subfiles) < FILE_COUNT_WARN_CUTOFF: logmail.addLogLine("Warning: found only %d files for exchange %s" % (len(subfiles), excname)) warncount += 1 return warncount
def checkExchangeDirs(logmail, logtype, daycode): hadcall = "hadoop fs -ls /data/%s/%s/" % (logtype, daycode) hadlines = SynchUtil.sysCallResult(hadcall) warncount = 0 for excname in SynchUtil.getExchanges(): if excname in noDataOkaySet(): continue dirpath = "/data/%s/%s/%s" % (logtype, daycode, excname) if not any(dirpath in hline for hline in hadlines): logmail.addLogLine("Warning: directory not found: %s" % (dirpath)) warncount += 1 continue # no point in continuing subhadls = "hadoop fs -ls %s" % (dirpath) subfiles = SynchUtil.sysCallResult(subhadls) if len(subfiles) < FILE_COUNT_WARN_CUTOFF: logmail.addLogLine("Warning: found only %d files for exchange %s" % (len(subfiles), excname)) warncount += 1 return warncount