Ejemplo n.º 1
0
def setupManiFile(daycode):
	
	batchdir = getBatchDir(daycode)
	
	if not os.path.exists(batchdir):
		os.system("mkdir %s" % batchdir)
	
	manimap = {}
	
	for onex in SynchUtil.getExchanges():
		for logtype in SynchUtil.getBigLogList():
			sizemap = LogFileDataSize.getHdfsPathList(onex, logtype, daycode)
			
			for onefile in sizemap:
				manimap[onefile] = sizemap[onefile]
			
			sizegb = sum(sizemap.values()) / 1000000000
			
			print "Found %d files, %d gb for %s %s %s" % (len(sizemap), sizegb, onex, logtype, daycode)
		
	
	
	fhandle = open(getManiFilePath(daycode), 'w')
	for onefile in manimap:
		fhandle.write(onefile + "\t" + str(manimap[onefile]) + "\n")
	fhandle.close()
	
	print "Finished building manifest, %d total files and %d gb size" % (len(manimap), sum(manimap.values())/1000000000)
Ejemplo n.º 2
0
def setupManiFile(daycode):

    batchdir = getBatchDir(daycode)

    if not os.path.exists(batchdir):
        os.system("mkdir %s" % batchdir)

    manimap = {}

    for onex in SynchUtil.getExchanges():
        for logtype in SynchUtil.getBigLogList():
            sizemap = LogFileDataSize.getHdfsPathList(onex, logtype, daycode)

            for onefile in sizemap:
                manimap[onefile] = sizemap[onefile]

            sizegb = sum(sizemap.values()) / 1000000000

            print "Found %d files, %d gb for %s %s %s" % (
                len(sizemap), sizegb, onex, logtype, daycode)

    fhandle = open(getManiFilePath(daycode), 'w')
    for onefile in manimap:
        fhandle.write(onefile + "\t" + str(manimap[onefile]) + "\n")
    fhandle.close()

    print "Finished building manifest, %d total files and %d gb size" % (
        len(manimap), sum(manimap.values()) / 1000000000)
Ejemplo n.º 3
0
def checkExchangeDirs(logmail, logtype, daycode):
	
	hadcall = "hadoop fs -ls /data/%s/%s/" % (logtype, daycode)
	hadlines = SynchUtil.sysCallResult(hadcall)
	warncount = 0
	
	for excname in SynchUtil.getExchanges():
				
		if excname in noDataOkaySet():
			continue
		
		dirpath = "/data/%s/%s/%s" % (logtype, daycode, excname)
		
		if not any(dirpath in hline for hline in hadlines):
			logmail.addLogLine("Warning: directory not found: %s" % (dirpath))
			warncount += 1
			continue # no point in continuing
			
			
		subhadls = "hadoop fs -ls %s" % (dirpath)
		subfiles = SynchUtil.sysCallResult(subhadls)
		
		if len(subfiles) < FILE_COUNT_WARN_CUTOFF:
			logmail.addLogLine("Warning: found only %d files for exchange %s" % (len(subfiles), excname))
			warncount += 1
			
	return warncount
Ejemplo n.º 4
0
def checkExchangeDirs(logmail, logtype, daycode):

    hadcall = "hadoop fs -ls /data/%s/%s/" % (logtype, daycode)
    hadlines = SynchUtil.sysCallResult(hadcall)
    warncount = 0

    for excname in SynchUtil.getExchanges():

        if excname in noDataOkaySet():
            continue

        dirpath = "/data/%s/%s/%s" % (logtype, daycode, excname)

        if not any(dirpath in hline for hline in hadlines):
            logmail.addLogLine("Warning: directory not found: %s" % (dirpath))
            warncount += 1
            continue  # no point in continuing

        subhadls = "hadoop fs -ls %s" % (dirpath)
        subfiles = SynchUtil.sysCallResult(subhadls)

        if len(subfiles) < FILE_COUNT_WARN_CUTOFF:
            logmail.addLogLine("Warning: found only %d files for exchange %s" %
                               (len(subfiles), excname))
            warncount += 1

    return warncount
Ejemplo n.º 5
0
def addOneMani(exchange, logtype, daycode, usepref):

    SynchUtil.writeManiFile(exchange, logtype, daycode, usepref)

    manipath = SynchUtil.getManiPath(exchange, logtype, daycode)

    for line in open(manipath):
        print line,
Ejemplo n.º 6
0
def addOneMani(exchange, logtype, daycode, usepref):
	
	SynchUtil.writeManiFile(exchange, logtype, daycode, usepref)
	
	manipath = SynchUtil.getManiPath(exchange, logtype, daycode)
	
	for line in open(manipath):
		print line,
Ejemplo n.º 7
0
def runIntUpdate(exchange, logtype, daycode):

	SynchUtil.writeManiFile(exchange, logtype, daycode, writesize=True)
	callHadoop(exchange, logtype, daycode)
	
	# delete manifest file
	manipath = SynchUtil.getManiPath(exchange, logtype, daycode)
	locRmCall = "rm %s" % (manipath)
	#print "Local rm call is %s" % (locRmCall)
	os.system(locRmCall)	
Ejemplo n.º 8
0
def runIntUpdate(exchange, logtype, daycode):

    SynchUtil.writeManiFile(exchange, logtype, daycode, writesize=True)
    callHadoop(exchange, logtype, daycode)

    # delete manifest file
    manipath = SynchUtil.getManiPath(exchange, logtype, daycode)
    locRmCall = "rm %s" % (manipath)
    #print "Local rm call is %s" % (locRmCall)
    os.system(locRmCall)
Ejemplo n.º 9
0
def getS3sizeMap(adex, logtype, daycode):

    sizemap = {}

    s3buck = SynchUtil.s3BucketGrab()
    s3keyl = SynchUtil.s3KeyList(s3buck, adex, logtype, daycode)

    for key in s3keyl:
        simpname = key.name.split('/')[-1]
        sizemap[simpname] = key.size

    return sizemap
Ejemplo n.º 10
0
def logFailureList():
	
	todaycode = SynchUtil.get_today()
	
	failpath = "/var/log/cronlogs/hdfs/lzoindexer/nolzolist_%s.txt" % (todaycode)
	
	hadJavaCall.runHadoopCall(FINDER_CLASS, [LZO_PATTERN, failpath])
Ejemplo n.º 11
0
def doSimpleSynch(adex, logtype, daycode):
	
	print "Running SimpleSynch for adex=%s, logtype=%s, daycode=%s" % (adex, logtype, daycode)
	
	# Generate a manifest file, using appropriate prefix
	manipath = SynchUtil.writeManiFile(adex, logtype, daycode)
	hdfsMani = "/tmp/mani/%s" % (manipath)
	
	# Put the manifest file on HDFS
	putCall = "hadoop fs -put %s %s" % (manipath, hdfsMani)
	print "PutCall: %s" % (hdfsMani)
	os.system(putCall)
	
	# run distcp
	hdfsDir = "/data/%s/%s/%s/" % (logtype, daycode, adex)	
	distCpCall = "hadoop distcp -f %s %s" % (hdfsMani, hdfsDir)
	print "DistCpCall: %s" % (distCpCall)
	os.system(distCpCall)
	
	# delete mani file
	hdfsRmCall = "hadoop fs -rm %s" % (hdfsMani)
	print hdfsRmCall
	os.system(hdfsRmCall)
	
	# Delete local file
	locRmCall = "rm %s" % (manipath)
	print "Local rm call is %s" % (manipath)
	os.system(locRmCall)
Ejemplo n.º 12
0
def doSimpleSynch(adex, logtype, daycode):

    print "Running SimpleSynch for adex=%s, logtype=%s, daycode=%s" % (
        adex, logtype, daycode)

    # Generate a manifest file, using appropriate prefix
    manipath = SynchUtil.writeManiFile(adex, logtype, daycode)
    hdfsMani = "/tmp/mani/%s" % (manipath)

    # Put the manifest file on HDFS
    putCall = "hadoop fs -put %s %s" % (manipath, hdfsMani)
    print "PutCall: %s" % (hdfsMani)
    os.system(putCall)

    # run distcp
    hdfsDir = "/data/%s/%s/%s/" % (logtype, daycode, adex)
    distCpCall = "hadoop distcp -f %s %s" % (hdfsMani, hdfsDir)
    print "DistCpCall: %s" % (distCpCall)
    os.system(distCpCall)

    # delete mani file
    hdfsRmCall = "hadoop fs -rm %s" % (hdfsMani)
    print hdfsRmCall
    os.system(hdfsRmCall)

    # Delete local file
    locRmCall = "rm %s" % (manipath)
    print "Local rm call is %s" % (manipath)
    os.system(locRmCall)
Ejemplo n.º 13
0
def callHadoop(exchange, logtype, daycode):
	
	manifilepath = SynchUtil.getManiPath(exchange, logtype, daycode)

	hadoopsys = "hadoop jar %s %s %s" % (SynchUtil.JAR_PATH, INT_UPDATE_CLASS, manifilepath) 
			
	#print "Hadoop call is %s" % ( hadoopsys )
	os.system(hadoopsys)	
Ejemplo n.º 14
0
def getPixelLogPath(pixid, opcode):

    logdir = "/mnt/data/userindex/%s/" % (SynchUtil.get_today())
    if not os.path.exists(logdir):
        os.system("mkdir %s" % (logdir))

    logpath = "%slog_%s_%s.txt" % (logdir, opcode, pixid)
    return logpath
Ejemplo n.º 15
0
def callHadoop(exchange, logtype, daycode):

    manifilepath = SynchUtil.getManiPath(exchange, logtype, daycode)

    hadoopsys = "hadoop jar %s %s %s" % (SynchUtil.JAR_PATH, INT_UPDATE_CLASS,
                                         manifilepath)

    #print "Hadoop call is %s" % ( hadoopsys )
    os.system(hadoopsys)
Ejemplo n.º 16
0
def cleanHdfsDirs():
	
	cleancall = "hadoop fs -rmr %s/*" % (HDFS_ADA_DIR)
	print "Cleaning adaclass dir with command %s" % (cleancall)
	
	if SynchUtil.promptOkay("Okay to delete? "):
		os.system(cleancall)
	else:
		print "Okay, quitting"
		sys.exit(1)
Ejemplo n.º 17
0
def getNfsSizeMap(adex, logtype, daycode):

    sizeMap = {}
    nfsdirpath = SynchUtil.getNfsDirPath(adex, logtype, daycode)

    for filename in os.listdir(nfsdirpath):

        if filename.find('.gz') == -1:
            continue

        filesize = os.path.getsize(nfsdirpath + "/" + filename)
        sizeMap[filename] = filesize

    return sizeMap
Ejemplo n.º 18
0
def createHdfsDirs(pixset):
	
	extantdirs = SynchUtil.sysCallResult("hadoop fs -ls %s" % HDFS_ADA_DIR)
	
	for onepix in pixset:
		
		pixpref = pixprefFromDir(onepix)
		
		if any([pixpref in exline for exline in extantdirs]):
			print "Found directory %s" % (pixpref)
		else:
			hadmkdir = "hadoop fs -mkdir %s/%s" % (HDFS_ADA_DIR, pixpref)
			print "Mkdir call is %s" % (hadmkdir)
			os.system(hadmkdir)
Ejemplo n.º 19
0
def getHDsizeMap(adex, logtype, daycode):

    sizeMap = {}
    hadlscall = "hadoop fs -ls /data/%s/%s/%s/" % (logtype, daycode, adex)

    hadooplines = SynchUtil.sysCallResult(hadlscall)

    for line in hadooplines:
        toks = line.split()

        if line.find('.gz') == -1:
            continue

        filename = toks[7].split('/')[-1]
        filesize = int(toks[4])
        sizeMap[filename] = filesize

    return sizeMap
Ejemplo n.º 20
0
    for missfile in misslist:
        print "File is MISSING: %s" % (missfile)

    for sizefile in sizelist:
        print "File %s \n\t has incorrect size of %d, should be %d" % (
            sizefile, bMap[sizefile], aMap[sizefile])


if __name__ == "__main__":

    if not len(sys.argv) == 4:
        print "Usage: SimpleSynch <all|adex> <big|mini|comp|logtype> <yest|daycode>"
        sys.exit(1)

    exclist = SynchUtil.getCheckExcList(sys.argv[1])
    loglist = SynchUtil.getCheckLogList(sys.argv[2])
    daylist = SynchUtil.getCheckDayList(sys.argv[3])

    # need to make sure we're running in a folder where the .mani files
    # can be written
    os.chdir('/mnt/src/cronjobs/')

    for onex in exclist:

        # DBH has no Big Data logs
        if onex == 'dbh':
            continue

        for logtype in loglist:
            for daycode in daylist:
Ejemplo n.º 21
0
def runTrackUpdate(daycode):

    hadoopsys = "hadoop jar %s %s %s" % (SynchUtil.JAR_PATH,
                                         UPDATE_TRACK_CLASS, daycode)

    print "Hadoop call is %s" % (hadoopsys)
    os.system(hadoopsys)


if __name__ == "__main__":
    """
	This is a one-time operation to copy the impression logs and update
	the tracking file
	
	"""

    exclist = SynchUtil.getCheckExcList('all')
    daylist = SynchUtil.getCheckDayList(sys.argv[1])
    logtype = 'imp'

    for daycode in daylist:
        for onex in exclist:
            print "Uploading logs for %s %s %s" % (onex, logtype, daycode)
            ConcatLzoSynch.runLogSync(onex, logtype, daycode)

        ConcatLzoSynch.runIndexer()

        # Now we have indexed LZO files, so we can run Java UpdateTrackFile
        runTrackUpdate(daycode)
Ejemplo n.º 22
0
	# Got rid of all the Mani-file nonsense
	#SynchUtil.writeManiFile(exchange, logtype, daycode, writesize=True)
	callHadoop(exchange, logtype, daycode)
		
if __name__ == "__main__":

	if not len(sys.argv) == 4:
		print "Usage: ConcatLzoSynch <all|adex> <big|mini|comp|logtype> <yest|daycode>"
		sys.exit(1)

	# need to make sure we're running in a folder where we have write permissions,
	# otherwise we won't be able to write the manifest file
	os.chdir('/local/src/cronjobs/')

	exclist = SynchUtil.getCheckExcList(sys.argv[1])	
	loglist = SynchUtil.getCheckLogList(sys.argv[2])
	daylist = SynchUtil.getCheckDayList(sys.argv[3])

	idxlist = []

	for onex in exclist:
		for logtype in loglist:
			for daycode in daylist:
				
				# This is now done by Java code
				##if not SynchUtil.nfsFilesExist(onex, logtype, daycode):
				#	print "No NFS files for %s %s %s" % (onex, logtype, daycode)
				#	continue
				
				print "Running ConcatLogSynch for %s %s %s" % (onex, logtype, daycode)
Ejemplo n.º 23
0
	os.system(hadcall)
	print "... done"

if __name__ == "__main__":
	
	daylist = []
	
	if len(sys.argv) < 2:
		print "Usage UploadExelate.py <daycode|daylist.txt>"
		sys.exit(1)
		
	singarg = sys.argv[1]
	if singarg.endswith(".txt"):
		gimp = [daylist.append(oneday.strip()) for oneday in open(singarg)]
	elif singarg == "yest":
		daylist.append(SynchUtil.get_yesterday())
	else:
		daylist.append(singarg)

	
	
	for daycode in daylist:
	
		grabExelateFile(daycode)
		
		upload2hdfs(daycode)		




	
Ejemplo n.º 24
0
	
	# TODO: change this to use LocalConf
	hadcall = "hadoop jar /local/bin/jars/adnetik.jar %s %s %s" % (dmclass, daycode, tempfile)
	print "\nHadoop call is : \n\t%s" % (hadcall)
	
	os.system(hadcall)
	
	#print "\nFinished with %s" % (dmclass)
	
if __name__ == "__main__":

	if not len(sys.argv) == 2:
		print "Usage:  BmUpdate <yest|daycode>"
		sys.exit(1)

	daylist = SynchUtil.getCheckDayList(sys.argv[1])
	daycode = daylist[0]

	# This is kind of the hacky way to do things
	sys.path.append("/local/src/python/util")
	import SimpleMail
	logmail = SimpleMail.SimpleMail("DbStageSlice")

	javalist = []
	
	# TODO: going to roll all of this into a single Java file StagingInfoManager, obviate this Python script
	#javalist.append("com.adnetik.data_management.Special2Staging")	
	javalist.append("com.adnetik.userindex.StagingInfoManager")
	#javalist.append("com.adnetik.data_management.Click2Staging")
	#javalist.append("com.adnetik.data_management.Negative2Staging")
	
Ejemplo n.º 25
0
	
	# delete manifest file
	manipath = SynchUtil.getManiPath(exchange, logtype, daycode)
	locRmCall = "rm %s" % (manipath)
	#print "Local rm call is %s" % (locRmCall)
	os.system(locRmCall)	
	

if __name__ == "__main__":
	
	if not len(sys.argv) == 4:
		print "Usage: InterestUpdateWrapper <all|adex> <big|mini|comp|logtype> <yest|daycode|filename>"
		sys.exit(1)

	# need to make sure we're running in a folder where we have write permissions,
	# otherwise we won't be able to write the manifest file
	os.chdir('/mnt/src/cronjobs/')	
	
	exclist = SynchUtil.getCheckExcList(sys.argv[1])	
	loglist = SynchUtil.getCheckLogList(sys.argv[2])
	daylist = SynchUtil.getCheckDayList(sys.argv[3])
	
	for onex in exclist:
		for logtype in loglist:
			for daycode in daylist:
				runIntUpdate(onex, logtype, daycode)
	
	
	
	
Ejemplo n.º 26
0
    print "... done"


if __name__ == "__main__":

    daylist = []

    if len(sys.argv) < 2:
        print "Usage UploadExelate.py <daycode|daylist.txt>"
        sys.exit(1)

    singarg = sys.argv[1]
    if singarg.endswith(".txt"):
        gimp = [daylist.append(oneday.strip()) for oneday in open(singarg)]
    elif singarg == "yest":
        daylist.append(SynchUtil.get_yesterday())
    else:
        daylist.append(singarg)

    for daycode in daylist:

        grabExelateFile(daycode)

        upload2hdfs(daycode)

#pc_set = set()
#for x in targlist:
#	pc_set.add(x)
#	pc_set.add(pix_comp_map[x])
#
#for onepix in pc_set:
Ejemplo n.º 27
0
    # Got rid of all the Mani-file nonsense
    #SynchUtil.writeManiFile(exchange, logtype, daycode, writesize=True)
    callHadoop(exchange, logtype, daycode)


if __name__ == "__main__":

    if not len(sys.argv) == 4:
        print "Usage: ConcatLzoSynch <all|adex> <big|mini|comp|logtype> <yest|daycode>"
        sys.exit(1)

    # need to make sure we're running in a folder where we have write permissions,
    # otherwise we won't be able to write the manifest file
    os.chdir('/local/src/cronjobs/')

    exclist = SynchUtil.getCheckExcList(sys.argv[1])
    loglist = SynchUtil.getCheckLogList(sys.argv[2])
    daylist = SynchUtil.getCheckDayList(sys.argv[3])

    idxlist = []

    for onex in exclist:
        for logtype in loglist:
            for daycode in daylist:

                # This is now done by Java code
                ##if not SynchUtil.nfsFilesExist(onex, logtype, daycode):
                #	print "No NFS files for %s %s %s" % (onex, logtype, daycode)
                #	continue

                print "Running ConcatLogSynch for %s %s %s" % (onex, logtype,
Ejemplo n.º 28
0
    # need to make sure we're running in a folder where we have write permissions,
    # otherwise we won't be able to write the manifest file
    os.chdir('/mnt/src/cronjobs/')

    exclist = []
    daylist = []
    loglist = []

    for line in sys.stdin:

        if len(line.strip().split('\t')) < 3:
            continue

        (excCode, logType, dayCode) = line.strip().split('\t')

        #print "Syncing %s %s %s" % (excCode, logType, dayCode)

        exclist.append(SynchUtil.getCheckExcList(excCode)[0])
        loglist.append(SynchUtil.getCheckLogList(logType)[0])
        daylist.append(SynchUtil.getCheckDayList(dayCode)[0])

    idxlist = []

    for i in range(len(exclist)):
        print "Syncing %s %s %s" % (exclist[i], loglist[i], daylist[i])
        ConcatLzoSynch.runLogSync(exclist[i], loglist[i], daylist[i])
        idxlist.add(SynchUtil.getHdfsPath(exclist[i], loglist[i], daylist[i]))

    for toidx in idxlist:
        ConcatLzoSynch.runIndexer(toidx)
Ejemplo n.º 29
0
	os.system(hdfsRmCall)
	
	# Delete local file
	locRmCall = "rm %s" % (manipath)
	print "Local rm call is %s" % (manipath)
	os.system(locRmCall)
	
	
if __name__ == "__main__":

	if not len(sys.argv) == 4:
		print "Usage: SimpleSynch <all|adex> <big|mini|comp|logtype> <yest|daycode>"
		sys.exit(1)


	exclist = SynchUtil.getCheckExcList(sys.argv[1])	
	loglist = SynchUtil.getCheckLogList(sys.argv[2])
	daylist = SynchUtil.getCheckDayList(sys.argv[3])
	
	# need to make sure we're running in a folder where the .mani files
	# can be written
	os.chdir('/var/log/cronlogs/hdfs/manifiles/')

	for onex in exclist:
		for logtype in loglist:
			for daycode in daylist:
				
				if not SynchUtil.nfsFilesExist(onex, logtype, daycode):
					print "No NFS files for %s %s %s" % (onex, logtype, daycode)
					continue				
				
Ejemplo n.º 30
0
		subhadls = "hadoop fs -ls %s" % (dirpath)
		subfiles = SynchUtil.sysCallResult(subhadls)
		
		if len(subfiles) < FILE_COUNT_WARN_CUTOFF:
			logmail.addLogLine("Warning: found only %d files for exchange %s" % (len(subfiles), excname))
			warncount += 1
			
	return warncount
		

if __name__ == "__main__":

	"""
	Check to make sure that the daily Synch Jobs ran correctly. 
	If they didn't, send an AdminMail
	"""
	
	daycode = SynchUtil.get_yesterday() if "yest" in sys.argv[1] else sys.argv[1]
	
	logmail = SimpleMail.SimpleMail("File Synch Check for %s" % (daycode))	

	warncount = 0
	warncount += checkExchangeDirs(logmail, "no_bid", daycode)
	warncount += checkExchangeDirs(logmail, "bid_all", daycode)
	
	if warncount > 0:
		logmail.send2admin()
			
	

Ejemplo n.º 31
0
    print hdfsRmCall
    os.system(hdfsRmCall)

    # Delete local file
    locRmCall = "rm %s" % (manipath)
    print "Local rm call is %s" % (manipath)
    os.system(locRmCall)


if __name__ == "__main__":

    if not len(sys.argv) == 4:
        print "Usage: SimpleSynch <all|adex> <big|mini|comp|logtype> <yest|daycode>"
        sys.exit(1)

    exclist = SynchUtil.getCheckExcList(sys.argv[1])
    loglist = SynchUtil.getCheckLogList(sys.argv[2])
    daylist = SynchUtil.getCheckDayList(sys.argv[3])

    # need to make sure we're running in a folder where the .mani files
    # can be written
    os.chdir('/var/log/cronlogs/hdfs/manifiles/')

    for onex in exclist:
        for logtype in loglist:
            for daycode in daylist:

                if not SynchUtil.nfsFilesExist(onex, logtype, daycode):
                    print "No NFS files for %s %s %s" % (onex, logtype,
                                                         daycode)
                    continue
Ejemplo n.º 32
0
            continue  # no point in continuing

        subhadls = "hadoop fs -ls %s" % (dirpath)
        subfiles = SynchUtil.sysCallResult(subhadls)

        if len(subfiles) < FILE_COUNT_WARN_CUTOFF:
            logmail.addLogLine("Warning: found only %d files for exchange %s" %
                               (len(subfiles), excname))
            warncount += 1

    return warncount


if __name__ == "__main__":
    """
	Check to make sure that the daily Synch Jobs ran correctly. 
	If they didn't, send an AdminMail
	"""

    daycode = SynchUtil.get_yesterday(
    ) if "yest" in sys.argv[1] else sys.argv[1]

    logmail = SimpleMail.SimpleMail("File Synch Check for %s" % (daycode))

    warncount = 0
    warncount += checkExchangeDirs(logmail, "no_bid", daycode)
    warncount += checkExchangeDirs(logmail, "bid_all", daycode)

    if warncount > 0:
        logmail.send2admin()
Ejemplo n.º 33
0
#!/usr/bin/python

###################################################
# Just a wrapper for the two SliceInterest jobs
# Want these to run back to back, hard to do without a wrapper
###################################################

import os, sys
import SynchUtil

def runCall(dmclass, daycode):
	
	hadcall = "hadoop jar /mnt/jars/adnetik.jar com.adnetik.data_management.%s %s" % (dmclass, daycode)
	print "\nHadoop call is : \n\t%s" % (hadcall)
	
	os.system(hadcall)
	
	print "\nFinished with %s" % (dmclass)
	
if __name__ == "__main__":

	# this is ALWAYS going to run using "yesterday"

	daycode = SynchUtil.get_yesterday()

	runCall("SliceInterestActivity", daycode)
	runCall("SliceInterestSecond", daycode)

	
Ejemplo n.º 34
0
	os.chdir('/mnt/src/cronjobs/')

	exclist = [] 
	daylist = []
	loglist = []

	for line in sys.stdin:

		if len(line.strip().split('\t')) < 3:
			continue
		
		(excCode, logType, dayCode) = line.strip().split('\t')
		
		#print "Syncing %s %s %s" % (excCode, logType, dayCode)
		
		exclist.append(SynchUtil.getCheckExcList(excCode)[0])
		loglist.append(SynchUtil.getCheckLogList(logType)[0])
		daylist.append(SynchUtil.getCheckDayList(dayCode)[0])		
		
		
	idxlist = []	
		
	for i in range(len(exclist)):
		print "Syncing %s %s %s" % (exclist[i], loglist[i], daylist[i])
		ConcatLzoSynch.runLogSync(exclist[i], loglist[i], daylist[i])
		idxlist.add(SynchUtil.getHdfsPath(exclist[i], loglist[i], daylist[i]))

	for toidx in idxlist: 
		ConcatLzoSynch.runIndexer(toidx)

	
Ejemplo n.º 35
0
    hadcall = "hadoop fs -cat %s | gunzip | %s > %s" % (filename, javacall,
                                                        batchfile)
    #print hadcall
    os.system(hadcall)


if __name__ == "__main__":

    if not len(sys.argv) == 3:
        print "Usage BatchSort.py daycode a/b/c"
        sys.exit(1)

    daycode = sys.argv[1]
    if "yest" == daycode:
        daycode = SynchUtil.get_yesterday()

    (proc_id, num_proc, num_batch) = [int(x) for x in sys.argv[2].split("/")]
    if int(proc_id) == 0:
        setupManiFile(daycode)

    # TODO: don't use temp file, instead hadoop cat into another program
    # that does the scrubbing, and direct output to the batch file.
    print "proc=%d, num=%d, nbatch=%d" % (proc_id, num_proc, num_batch)

    manimap = getManiData(daycode)

    # TODO: prefilter files so that we can print "finished with file 52/652..."
    targlist = [
        onefile for onefile in manimap
        if ((hash(onefile) % num_batch) % num_proc) == proc_id
Ejemplo n.º 36
0
	for subpixdir in listset:
		
		pixpref = pixprefFromDir(subpixdir)
		hdfsdir = "/userindex/adaclass/%s/" % (pixpref)
		
		#rmcall = "hadoop fs -rmr %s
		
		hadcall = "hadoop fs -put %s%s/*.ser %s" % (locdir, subpixdir, hdfsdir)
		
		print "Upload call is %s" % (hadcall)
		os.system(hadcall)
	

if __name__ == "__main__":

	daycode = SynchUtil.get_today() if len(sys.argv) < 2 else sys.argv[1]

	print "\nCalling for daycode %s" % (daycode)
	
	locadadir = "/mnt/data/userindex/%s/" % (daycode)
	
	listset = set()
	
	for onedir in os.listdir(locadadir):
		
		if "adaclass_" in onedir:
			
			serfiles = [onefile for onefile in os.listdir(locadadir + "/" + onedir) if ".ser" in onefile]
			
			if len(serfiles) > 0:
				print "Found directory %s with %d serfiles" % (onedir, len(serfiles))
Ejemplo n.º 37
0
	hdfsdir = sys.argv[1]	
	
	ftypemap = {}
	
	for onefile in os.listdir("."):
				
		ftoks = onefile.split(".")
		
		if not len(ftoks) == 2:
			continue
			
		basename = ftoks[0]
		ftype = ftoks[1]

		ftypemap.setdefault(ftype, 0)
		ftypemap[ftype] += 1
	
	
	for ftype in ftypemap:
		print "Found %d files of type %s" % (ftypemap[ftype], ftype)

	if not SynchUtil.promptOkay("Going to upload files to %s" % (hdfsdir)):	
		sys.exit(1)
		
	for onefile in os.listdir("."):
		
		upcall = "hadoop fs -put %s %s" % (onefile, hdfsdir)
		print "Upload call is %s" % (upcall)
		os.system(upcall)
Ejemplo n.º 38
0
def runTrackUpdate(daycode):
	
	hadoopsys = "hadoop jar %s %s %s" % (SynchUtil.JAR_PATH, UPDATE_TRACK_CLASS, daycode) 
			
	print "Hadoop call is %s" % ( hadoopsys )
	os.system(hadoopsys)		
	
if __name__ == "__main__":
	
	"""
	This is a one-time operation to copy the impression logs and update
	the tracking file
	
	"""

	exclist = SynchUtil.getCheckExcList('all')	
	daylist = SynchUtil.getCheckDayList(sys.argv[1])
	logtype = 'imp'

	for daycode in daylist:
		for onex in exclist:
			print "Uploading logs for %s %s %s" % (onex, logtype, daycode)
			ConcatLzoSynch.runLogSync(onex, logtype, daycode)
		
		ConcatLzoSynch.runIndexer()
				
		# Now we have indexed LZO files, so we can run Java UpdateTrackFile
		runTrackUpdate(daycode)