Esempio n. 1
0
def logFailureList():
	
	todaycode = SynchUtil.get_today()
	
	failpath = "/var/log/cronlogs/hdfs/lzoindexer/nolzolist_%s.txt" % (todaycode)
	
	hadJavaCall.runHadoopCall(FINDER_CLASS, [LZO_PATTERN, failpath])
Esempio n. 2
0
def runLzoPass():
		
	noindexfile = "%s/noindexfile.txt.tmp" % (USER_HOME)
		
	hadJavaCall.runHadoopCall(FINDER_CLASS, [LZO_PATTERN, noindexfile])

	for onehdfspath in open(noindexfile):
		
		lochadcall = "hadoop jar %s %s %s" % (LZO_JAR_PATH, LOCAL_INDEXER_CLASS, onehdfspath)
		
		print "Local had call is %s" % (lochadcall)
			
		exitcall = os.system(lochadcall)
Esempio n. 3
0
#!/usr/bin/python

import re, os, sys, fileinput

# TODO: this is not a good way to do this
sys.path.append("/home/burfoot/src/python/codeutil")
sys.path.append("/home/burfoot/src/python/shared")

import hadJavaCall, ArgMap

if __name__ == "__main__":

	argmap = ArgMap.getClArgMap(sys.argv)
	assert argmap.containsKey("blockend"), "Must specify blockend"
	
	print "Argmap is %s" % (argmap)

	precompclass = "UserIndexPrecompute"
	learnclass = "LearningTool"
	
	hadJavaCall.runHadoopCall(precompclass, sys.argv[1:])
	hadJavaCall.runHadoopCall(learnclass, sys.argv[1:])
Esempio n. 4
0
if __name__ == "__main__":

    argmap = ArgMap.getClArgMap(sys.argv)

    assert argmap.containsKey(
        "blockend"), "Must include explicit block-end date"

    printonly = argmap.getBoolean("printonly", False)

    startpart = argmap.getInt("startpart", 0)
    endpart = argmap.getInt("endpart", 24)

    print "Printonly is %r" % (printonly)

    basearglist = []
    if argmap.containsKey("blockend"):
        basearglist.append("blockend=%s" %
                           (argmap.getString("blockend", "xxxnoval")))

    for shardid in range(startpart, endpart):
        hadJavaCall.runHadoopCall("ScoreUserJob", [str(shardid)] + basearglist,
                                  printonly)

    # Final list creation
    finalclass = "FinalListJob"
    hadJavaCall.runHadoopCall(finalclass, basearglist, printonly)

    # Create lift report
    hadJavaCall.runHadoopCall(finalclass, basearglist + ["chart=true"],
                              printonly)
Esempio n. 5
0
#!/usr/bin/python

import sys, os, LocalConf, hadJavaCall

if __name__ == "__main__":

    hadJavaCall.runHadoopCall(sys.argv[1],
                              sys.argv[2:],
                              grabhost="gandalf.adnetik.com")
Esempio n. 6
0
			
sys.path.append("/local/src/python/util")
sys.path.append("/local/src/python/shared")

import Util
import hadJavaCall

if __name__ == "__main__":

	"""
	Run both the Hadoop mode aggregation AND the KVUploader
	"""
	
	if len(sys.argv) < 2:
		print "HadAggNUpload <daylist.txt>"
		sys.exit(1)
		
	daycodefile = sys.argv[1]	
	
	dayset = set()
	
	for oneline in open(daycodefile):
		dayset.add(oneline.strip())
			
	if not Util.promptOkay("Okay to run for %s?" % (",".join(dayset))):
		print "Aborting"
	
	for oneday in dayset:	
		hadJavaCall.runHadoopCall("HadoopMode", [oneday])
		hadJavaCall.runHadoopCall("KvUploader", [oneday])
Esempio n. 7
0
	argmap = ArgMap.getClArgMap(sys.argv)

	assert argmap.containsKey("blockend"), "Must include explicit block-end date"

	printonly = argmap.getBoolean("printonly", False)
	
	startpart = argmap.getInt("startpart", 0)
	endpart = argmap.getInt("endpart", 24)
	
	print "Printonly is %r" % (printonly)
	
	basearglist = []
	if argmap.containsKey("blockend"):
		basearglist.append("blockend=%s" % (argmap.getString("blockend", "xxxnoval")))
		
	for shardid in range(startpart, endpart):				
		hadJavaCall.runHadoopCall("ScoreUserJob", [str(shardid)] + basearglist, printonly)

	# Final list creation 
	finalclass = "FinalListJob"
	hadJavaCall.runHadoopCall(finalclass, basearglist, printonly)
	
	# Create lift report
	hadJavaCall.runHadoopCall(finalclass, basearglist + ["chart=true"], printonly)
	
	


	
Esempio n. 8
0
import re, os, sys, fileinput, JavaCall, hadJavaCall

sys.path.append('/local/src/python/shared')

import Util

if __name__ == "__main__":

    if len(sys.argv) < 3:
        print "MultiHadCall <jclass> <arglist.txt>"
        sys.exit(1)

    jclass = sys.argv[1]

    fullclass = JavaCall.getFullClass(jclass)

    arglist = [onearg.strip() for onearg in open(sys.argv[2])]

    if Util.promptOkay("Going to run %s for %s?" %
                       (fullclass, ",".join(arglist))):

        for onearg in arglist:

            moreargs = [onearg]

            if len(sys.argv) > 3:
                moreargs.extend(sys.argv[3:])

            hadJavaCall.runHadoopCall(jclass, moreargs)
Esempio n. 9
0
#!/usr/bin/python

import re, os, sys, fileinput

# TODO: this is not a good way to do this
sys.path.append("/home/burfoot/src/python/codeutil")
sys.path.append("/home/burfoot/src/python/shared")

import hadJavaCall, ArgMap

if __name__ == "__main__":

    argmap = ArgMap.getClArgMap(sys.argv)
    assert argmap.containsKey("blockend"), "Must specify blockend"

    print "Argmap is %s" % (argmap)

    precompclass = "UserIndexPrecompute"
    learnclass = "LearningTool"

    hadJavaCall.runHadoopCall(precompclass, sys.argv[1:])
    hadJavaCall.runHadoopCall(learnclass, sys.argv[1:])
Esempio n. 10
0
import re, os, sys, fileinput, JavaCall, hadJavaCall
			
sys.path.append('/local/src/python/shared')

import Util
			
if __name__ == "__main__":

	if len(sys.argv) < 3:
		print "MultiHadCall <jclass> <arglist.txt>"
		sys.exit(1)
		
	jclass = sys.argv[1]
	
	fullclass = JavaCall.getFullClass(jclass)
	
	arglist = [onearg.strip() for onearg in open(sys.argv[2])]
	
	
	
	if Util.promptOkay("Going to run %s for %s?" % (fullclass, ",".join(arglist))):
		
		for onearg in arglist:
			
			moreargs = [onearg]
			
			if len(sys.argv) > 3:
				moreargs.extend(sys.argv[3:])			
				
			hadJavaCall.runHadoopCall(jclass, moreargs)
Esempio n. 11
0
#!/usr/bin/python

#xxxxxxxxxx

import re, os, sys, fileinput
			
sys.path.append("/local/src/python/util")
import hadJavaCall

if __name__ == "__main__":

	"""
	Runs the HadoopMode backfill for a bunch of argument days.
	"""
	
	dayset = set()
	
	for oneline in sys.stdin:
		dayset.add(oneline.strip())
			
	for oneday in dayset:	
		hadJavaCall.runHadoopCall("HadoopMode", [oneday])
Esempio n. 12
0
#!/usr/bin/python

#xxxxxxxxxx

import re, os, sys, fileinput

sys.path.append("/local/src/python/util")
import hadJavaCall

if __name__ == "__main__":
    """
	Runs the HadoopMode backfill for a bunch of argument days.
	"""

    dayset = set()

    for oneline in sys.stdin:
        dayset.add(oneline.strip())

    for oneday in dayset:
        hadJavaCall.runHadoopCall("HadoopMode", [oneday])