hadcall = "hadoop fs -cat %s | gunzip | %s > %s" % (filename, javacall, batchfile) #print hadcall os.system(hadcall) if __name__ == "__main__": if not len(sys.argv) == 3: print "Usage BatchSort.py daycode a/b/c" sys.exit(1) daycode = sys.argv[1] if "yest" == daycode: daycode = SynchUtil.get_yesterday() (proc_id, num_proc, num_batch) = [int(x) for x in sys.argv[2].split("/")] if int(proc_id) == 0: setupManiFile(daycode) # TODO: don't use temp file, instead hadoop cat into another program # that does the scrubbing, and direct output to the batch file. print "proc=%d, num=%d, nbatch=%d" % (proc_id, num_proc, num_batch) manimap = getManiData(daycode) # TODO: prefilter files so that we can print "finished with file 52/652..." targlist = [ onefile for onefile in manimap if ((hash(onefile) % num_batch) % num_proc) == proc_id
#!/usr/bin/python ################################################### # Just a wrapper for the two SliceInterest jobs # Want these to run back to back, hard to do without a wrapper ################################################### import os, sys import SynchUtil def runCall(dmclass, daycode): hadcall = "hadoop jar /mnt/jars/adnetik.jar com.adnetik.data_management.%s %s" % (dmclass, daycode) print "\nHadoop call is : \n\t%s" % (hadcall) os.system(hadcall) print "\nFinished with %s" % (dmclass) if __name__ == "__main__": # this is ALWAYS going to run using "yesterday" daycode = SynchUtil.get_yesterday() runCall("SliceInterestActivity", daycode) runCall("SliceInterestSecond", daycode)
os.system(hadcall) print "... done" if __name__ == "__main__": daylist = [] if len(sys.argv) < 2: print "Usage UploadExelate.py <daycode|daylist.txt>" sys.exit(1) singarg = sys.argv[1] if singarg.endswith(".txt"): gimp = [daylist.append(oneday.strip()) for oneday in open(singarg)] elif singarg == "yest": daylist.append(SynchUtil.get_yesterday()) else: daylist.append(singarg) for daycode in daylist: grabExelateFile(daycode) upload2hdfs(daycode)
continue # no point in continuing subhadls = "hadoop fs -ls %s" % (dirpath) subfiles = SynchUtil.sysCallResult(subhadls) if len(subfiles) < FILE_COUNT_WARN_CUTOFF: logmail.addLogLine("Warning: found only %d files for exchange %s" % (len(subfiles), excname)) warncount += 1 return warncount if __name__ == "__main__": """ Check to make sure that the daily Synch Jobs ran correctly. If they didn't, send an AdminMail """ daycode = SynchUtil.get_yesterday( ) if "yest" in sys.argv[1] else sys.argv[1] logmail = SimpleMail.SimpleMail("File Synch Check for %s" % (daycode)) warncount = 0 warncount += checkExchangeDirs(logmail, "no_bid", daycode) warncount += checkExchangeDirs(logmail, "bid_all", daycode) if warncount > 0: logmail.send2admin()
subhadls = "hadoop fs -ls %s" % (dirpath) subfiles = SynchUtil.sysCallResult(subhadls) if len(subfiles) < FILE_COUNT_WARN_CUTOFF: logmail.addLogLine("Warning: found only %d files for exchange %s" % (len(subfiles), excname)) warncount += 1 return warncount if __name__ == "__main__": """ Check to make sure that the daily Synch Jobs ran correctly. If they didn't, send an AdminMail """ daycode = SynchUtil.get_yesterday() if "yest" in sys.argv[1] else sys.argv[1] logmail = SimpleMail.SimpleMail("File Synch Check for %s" % (daycode)) warncount = 0 warncount += checkExchangeDirs(logmail, "no_bid", daycode) warncount += checkExchangeDirs(logmail, "bid_all", daycode) if warncount > 0: logmail.send2admin()
print "... done" if __name__ == "__main__": daylist = [] if len(sys.argv) < 2: print "Usage UploadExelate.py <daycode|daylist.txt>" sys.exit(1) singarg = sys.argv[1] if singarg.endswith(".txt"): gimp = [daylist.append(oneday.strip()) for oneday in open(singarg)] elif singarg == "yest": daylist.append(SynchUtil.get_yesterday()) else: daylist.append(singarg) for daycode in daylist: grabExelateFile(daycode) upload2hdfs(daycode) #pc_set = set() #for x in targlist: # pc_set.add(x) # pc_set.add(pix_comp_map[x]) # #for onepix in pc_set: