def logFailureList(): todaycode = SynchUtil.get_today() failpath = "/var/log/cronlogs/hdfs/lzoindexer/nolzolist_%s.txt" % (todaycode) hadJavaCall.runHadoopCall(FINDER_CLASS, [LZO_PATTERN, failpath])
def runLzoPass(): noindexfile = "%s/noindexfile.txt.tmp" % (USER_HOME) hadJavaCall.runHadoopCall(FINDER_CLASS, [LZO_PATTERN, noindexfile]) for onehdfspath in open(noindexfile): lochadcall = "hadoop jar %s %s %s" % (LZO_JAR_PATH, LOCAL_INDEXER_CLASS, onehdfspath) print "Local had call is %s" % (lochadcall) exitcall = os.system(lochadcall)
#!/usr/bin/python import re, os, sys, fileinput # TODO: this is not a good way to do this sys.path.append("/home/burfoot/src/python/codeutil") sys.path.append("/home/burfoot/src/python/shared") import hadJavaCall, ArgMap if __name__ == "__main__": argmap = ArgMap.getClArgMap(sys.argv) assert argmap.containsKey("blockend"), "Must specify blockend" print "Argmap is %s" % (argmap) precompclass = "UserIndexPrecompute" learnclass = "LearningTool" hadJavaCall.runHadoopCall(precompclass, sys.argv[1:]) hadJavaCall.runHadoopCall(learnclass, sys.argv[1:])
if __name__ == "__main__": argmap = ArgMap.getClArgMap(sys.argv) assert argmap.containsKey( "blockend"), "Must include explicit block-end date" printonly = argmap.getBoolean("printonly", False) startpart = argmap.getInt("startpart", 0) endpart = argmap.getInt("endpart", 24) print "Printonly is %r" % (printonly) basearglist = [] if argmap.containsKey("blockend"): basearglist.append("blockend=%s" % (argmap.getString("blockend", "xxxnoval"))) for shardid in range(startpart, endpart): hadJavaCall.runHadoopCall("ScoreUserJob", [str(shardid)] + basearglist, printonly) # Final list creation finalclass = "FinalListJob" hadJavaCall.runHadoopCall(finalclass, basearglist, printonly) # Create lift report hadJavaCall.runHadoopCall(finalclass, basearglist + ["chart=true"], printonly)
#!/usr/bin/python import sys, os, LocalConf, hadJavaCall if __name__ == "__main__": hadJavaCall.runHadoopCall(sys.argv[1], sys.argv[2:], grabhost="gandalf.adnetik.com")
sys.path.append("/local/src/python/util") sys.path.append("/local/src/python/shared") import Util import hadJavaCall if __name__ == "__main__": """ Run both the Hadoop mode aggregation AND the KVUploader """ if len(sys.argv) < 2: print "HadAggNUpload <daylist.txt>" sys.exit(1) daycodefile = sys.argv[1] dayset = set() for oneline in open(daycodefile): dayset.add(oneline.strip()) if not Util.promptOkay("Okay to run for %s?" % (",".join(dayset))): print "Aborting" for oneday in dayset: hadJavaCall.runHadoopCall("HadoopMode", [oneday]) hadJavaCall.runHadoopCall("KvUploader", [oneday])
argmap = ArgMap.getClArgMap(sys.argv) assert argmap.containsKey("blockend"), "Must include explicit block-end date" printonly = argmap.getBoolean("printonly", False) startpart = argmap.getInt("startpart", 0) endpart = argmap.getInt("endpart", 24) print "Printonly is %r" % (printonly) basearglist = [] if argmap.containsKey("blockend"): basearglist.append("blockend=%s" % (argmap.getString("blockend", "xxxnoval"))) for shardid in range(startpart, endpart): hadJavaCall.runHadoopCall("ScoreUserJob", [str(shardid)] + basearglist, printonly) # Final list creation finalclass = "FinalListJob" hadJavaCall.runHadoopCall(finalclass, basearglist, printonly) # Create lift report hadJavaCall.runHadoopCall(finalclass, basearglist + ["chart=true"], printonly)
import re, os, sys, fileinput, JavaCall, hadJavaCall sys.path.append('/local/src/python/shared') import Util if __name__ == "__main__": if len(sys.argv) < 3: print "MultiHadCall <jclass> <arglist.txt>" sys.exit(1) jclass = sys.argv[1] fullclass = JavaCall.getFullClass(jclass) arglist = [onearg.strip() for onearg in open(sys.argv[2])] if Util.promptOkay("Going to run %s for %s?" % (fullclass, ",".join(arglist))): for onearg in arglist: moreargs = [onearg] if len(sys.argv) > 3: moreargs.extend(sys.argv[3:]) hadJavaCall.runHadoopCall(jclass, moreargs)
#!/usr/bin/python #xxxxxxxxxx import re, os, sys, fileinput sys.path.append("/local/src/python/util") import hadJavaCall if __name__ == "__main__": """ Runs the HadoopMode backfill for a bunch of argument days. """ dayset = set() for oneline in sys.stdin: dayset.add(oneline.strip()) for oneday in dayset: hadJavaCall.runHadoopCall("HadoopMode", [oneday])