Ejemplo n.º 1
0
    hadcall = "hadoop fs -cat %s | gunzip | %s > %s" % (filename, javacall,
                                                        batchfile)
    #print hadcall
    os.system(hadcall)


if __name__ == "__main__":

    if not len(sys.argv) == 3:
        print "Usage BatchSort.py daycode a/b/c"
        sys.exit(1)

    daycode = sys.argv[1]
    if "yest" == daycode:
        daycode = SynchUtil.get_yesterday()

    (proc_id, num_proc, num_batch) = [int(x) for x in sys.argv[2].split("/")]
    if int(proc_id) == 0:
        setupManiFile(daycode)

    # TODO: don't use temp file, instead hadoop cat into another program
    # that does the scrubbing, and direct output to the batch file.
    print "proc=%d, num=%d, nbatch=%d" % (proc_id, num_proc, num_batch)

    manimap = getManiData(daycode)

    # TODO: prefilter files so that we can print "finished with file 52/652..."
    targlist = [
        onefile for onefile in manimap
        if ((hash(onefile) % num_batch) % num_proc) == proc_id
Ejemplo n.º 2
0
#!/usr/bin/python

###################################################
# Just a wrapper for the two SliceInterest jobs
# Want these to run back to back, hard to do without a wrapper
###################################################

import os, sys
import SynchUtil

def runCall(dmclass, daycode):
	
	hadcall = "hadoop jar /mnt/jars/adnetik.jar com.adnetik.data_management.%s %s" % (dmclass, daycode)
	print "\nHadoop call is : \n\t%s" % (hadcall)
	
	os.system(hadcall)
	
	print "\nFinished with %s" % (dmclass)
	
if __name__ == "__main__":

	# this is ALWAYS going to run using "yesterday"

	daycode = SynchUtil.get_yesterday()

	runCall("SliceInterestActivity", daycode)
	runCall("SliceInterestSecond", daycode)

	
Ejemplo n.º 3
0
	os.system(hadcall)
	print "... done"

if __name__ == "__main__":
	
	daylist = []
	
	if len(sys.argv) < 2:
		print "Usage UploadExelate.py <daycode|daylist.txt>"
		sys.exit(1)
		
	singarg = sys.argv[1]
	if singarg.endswith(".txt"):
		gimp = [daylist.append(oneday.strip()) for oneday in open(singarg)]
	elif singarg == "yest":
		daylist.append(SynchUtil.get_yesterday())
	else:
		daylist.append(singarg)

	
	
	for daycode in daylist:
	
		grabExelateFile(daycode)
		
		upload2hdfs(daycode)		




	
Ejemplo n.º 4
0
            continue  # no point in continuing

        subhadls = "hadoop fs -ls %s" % (dirpath)
        subfiles = SynchUtil.sysCallResult(subhadls)

        if len(subfiles) < FILE_COUNT_WARN_CUTOFF:
            logmail.addLogLine("Warning: found only %d files for exchange %s" %
                               (len(subfiles), excname))
            warncount += 1

    return warncount


if __name__ == "__main__":
    """
	Check to make sure that the daily Synch Jobs ran correctly. 
	If they didn't, send an AdminMail
	"""

    daycode = SynchUtil.get_yesterday(
    ) if "yest" in sys.argv[1] else sys.argv[1]

    logmail = SimpleMail.SimpleMail("File Synch Check for %s" % (daycode))

    warncount = 0
    warncount += checkExchangeDirs(logmail, "no_bid", daycode)
    warncount += checkExchangeDirs(logmail, "bid_all", daycode)

    if warncount > 0:
        logmail.send2admin()
Ejemplo n.º 5
0
		subhadls = "hadoop fs -ls %s" % (dirpath)
		subfiles = SynchUtil.sysCallResult(subhadls)
		
		if len(subfiles) < FILE_COUNT_WARN_CUTOFF:
			logmail.addLogLine("Warning: found only %d files for exchange %s" % (len(subfiles), excname))
			warncount += 1
			
	return warncount
		

if __name__ == "__main__":

	"""
	Check to make sure that the daily Synch Jobs ran correctly. 
	If they didn't, send an AdminMail
	"""
	
	daycode = SynchUtil.get_yesterday() if "yest" in sys.argv[1] else sys.argv[1]
	
	logmail = SimpleMail.SimpleMail("File Synch Check for %s" % (daycode))	

	warncount = 0
	warncount += checkExchangeDirs(logmail, "no_bid", daycode)
	warncount += checkExchangeDirs(logmail, "bid_all", daycode)
	
	if warncount > 0:
		logmail.send2admin()
			
	

Ejemplo n.º 6
0
    print "... done"


if __name__ == "__main__":

    daylist = []

    if len(sys.argv) < 2:
        print "Usage UploadExelate.py <daycode|daylist.txt>"
        sys.exit(1)

    singarg = sys.argv[1]
    if singarg.endswith(".txt"):
        gimp = [daylist.append(oneday.strip()) for oneday in open(singarg)]
    elif singarg == "yest":
        daylist.append(SynchUtil.get_yesterday())
    else:
        daylist.append(singarg)

    for daycode in daylist:

        grabExelateFile(daycode)

        upload2hdfs(daycode)

#pc_set = set()
#for x in targlist:
#	pc_set.add(x)
#	pc_set.add(pix_comp_map[x])
#
#for onepix in pc_set: