Beispiel #1
0
def classify(mapperExecPath,reducerExecPath,inputDir,outputDir,centroidsFile,testImgsJsonFile,N):
	centroidsHDFSFullPath = HadoopUtils.getHDFSFullPath(centroidsFile)
        centroidsFileName = centroidsFile.split("/")[-1]
	testImgsJsonHDFSFullPath = HadoopUtils.getHDFSFullPath(testImgsJsonFile)
	testImgsJsonFileName = testImgsJsonFile.split("/")[-1]
        mapperCommandStr = HadoopUtils.buildExecCommandStr([mapperExecPath,centroidsFileName,testImgsJsonFileName,str(N)])
	reducerCommandStr = HadoopUtils.buildExecCommandStr([reducerExecPath,str(N)])

        HadoopUtils.removeHDFSDirIfExists(outputDir)

        HadoopUtils.runHadoopStreamingJob(input=inputDir,
                                  output=outputDir,
                                  mapperCommand=mapperCommandStr,
                                  reducerCommand=reducerCommandStr,
                                  filesArray=[centroidsHDFSFullPath,testImgsJsonHDFSFullPath])
Beispiel #2
0
def preprocessInput(mapperExecPath,reducerExecPath,inputDir,outputDir,centroidsFile):
	centroidsHDFSFullPath = HadoopUtils.getHDFSFullPath(centroidsFile)
	centroidsFileName = centroidsFile.split("/")[-1]
	mapperCommandStr = HadoopUtils.buildExecCommandStr([mapperExecPath,centroidsFileName,str(2)])

	HadoopUtils.removeHDFSDirIfExists(outputDir)

	HadoopUtils.runHadoopStreamingJob(input=inputDir,
                                  output=outputDir,
                                  mapperCommand=mapperCommandStr,
				  numReducerTasks=1,
				  reducerCommand=reducerExecPath,
				  filesArray=[centroidsHDFSFullPath])
Beispiel #3
0
def storeInputIntoHDFS(inputLocalFilePath,inputFolder):
	HadoopUtils.put(inputLocalFilePath,inputFolder)
Beispiel #4
0
		global configs
		configs = json.load(config_file)

if (len(sys.argv) < MIN_NUM_ARGS):
	print "Wrong Number of Arguments:",len(sys.argv)
	printUsage()
	exit(1)

testImagesJsonFilePath = sys.argv[1]
confFilePath = sys.argv[2]
outputFilePath = sys.argv[3]

readConfigs(confFilePath)

hdfsInputFolder = configs[HDFS_BASE_FOLDER_TAG] + "/" + INPUT_FOLDER_NAME
HadoopUtils.removeHDFSDirIfExists(hdfsInputFolder)
HadoopUtils.mkdir(hdfsInputFolder)

storeInputIntoHDFS(testImagesJsonFilePath,hdfsInputFolder)

#Preprocess Images and store in on HDFS
testHistsFolder = configs[HDFS_BASE_FOLDER_TAG] + "/" + TEST_HISTS_FOLDER_NAME
preprocessInput(configs[PREPROCESSOR_MAPPER_EXEC_PATH],configs[PREPROCESSOR_REDUCER_EXEC_PATH],hdfsInputFolder,testHistsFolder,configs[CENTROIDS_HDFS_PATH])

#Run Image Retrieval Processing
trainHistsFolder = configs[HDFS_BASE_FOLDER_TAG] + "/" + TRAIN_HISTS_FOLDER_NAME
resultsFolder = configs[HDFS_BASE_FOLDER_TAG] + "/" + RESULTS_FOLDER_NAME
classify(configs[CLASSIFIER_MAPPER_EXEC_PATH],configs[CLASSIFIER_REDUCER_EXEC_PATH],trainHistsFolder,resultsFolder,configs[CENTROIDS_HDFS_PATH],testHistsFolder+"/part-00000",configs[N_TAG])

#Downloading Result from HDFS
HadoopUtils.getAndMergeFilesFromHDFS(resultsFolder + "/part-*",outputFilePath)