def checkExchangeDirs(logmail, logtype, daycode): hadcall = "hadoop fs -ls /data/%s/%s/" % (logtype, daycode) hadlines = SynchUtil.sysCallResult(hadcall) warncount = 0 for excname in SynchUtil.getExchanges(): if excname in noDataOkaySet(): continue dirpath = "/data/%s/%s/%s" % (logtype, daycode, excname) if not any(dirpath in hline for hline in hadlines): logmail.addLogLine("Warning: directory not found: %s" % (dirpath)) warncount += 1 continue # no point in continuing subhadls = "hadoop fs -ls %s" % (dirpath) subfiles = SynchUtil.sysCallResult(subhadls) if len(subfiles) < FILE_COUNT_WARN_CUTOFF: logmail.addLogLine("Warning: found only %d files for exchange %s" % (len(subfiles), excname)) warncount += 1 return warncount
def checkExchangeDirs(logmail, logtype, daycode): hadcall = "hadoop fs -ls /data/%s/%s/" % (logtype, daycode) hadlines = SynchUtil.sysCallResult(hadcall) warncount = 0 for excname in SynchUtil.getExchanges(): if excname in noDataOkaySet(): continue dirpath = "/data/%s/%s/%s" % (logtype, daycode, excname) if not any(dirpath in hline for hline in hadlines): logmail.addLogLine("Warning: directory not found: %s" % (dirpath)) warncount += 1 continue # no point in continuing subhadls = "hadoop fs -ls %s" % (dirpath) subfiles = SynchUtil.sysCallResult(subhadls) if len(subfiles) < FILE_COUNT_WARN_CUTOFF: logmail.addLogLine("Warning: found only %d files for exchange %s" % (len(subfiles), excname)) warncount += 1 return warncount
def createHdfsDirs(pixset): extantdirs = SynchUtil.sysCallResult("hadoop fs -ls %s" % HDFS_ADA_DIR) for onepix in pixset: pixpref = pixprefFromDir(onepix) if any([pixpref in exline for exline in extantdirs]): print "Found directory %s" % (pixpref) else: hadmkdir = "hadoop fs -mkdir %s/%s" % (HDFS_ADA_DIR, pixpref) print "Mkdir call is %s" % (hadmkdir) os.system(hadmkdir)
def getHDsizeMap(adex, logtype, daycode): sizeMap = {} hadlscall = "hadoop fs -ls /data/%s/%s/%s/" % (logtype, daycode, adex) hadooplines = SynchUtil.sysCallResult(hadlscall) for line in hadooplines: toks = line.split() if line.find('.gz') == -1: continue filename = toks[7].split('/')[-1] filesize = int(toks[4]) sizeMap[filename] = filesize return sizeMap