def walkAndCalcReps(root_dir): """Clasify files in a given directory to prortrait or not. Traverses all sub directories of the given root Saves to db if portrait """ global processedFname global repFname, failFname processedFname = os.path.join(dataPath,processedFname) repFname = os.path.join(dataPath,repFname) failFname = os.path.join(dataPath,failFname) processedList = [] if os.path.isfile(processedFname): processedList = pickle.load(open(processedFname,'r')) repDict = {} if os.path.isfile(repFname): repDict = pickle.load(open(repFname,'r')) failDict = {} if os.path.isfile(failFname): failDict = pickle.load(open(failFname,'r')) # print repFname # print dirs = [s for s in os.listdir(root_dir) if s.lower().startswith('ie')] # print '********' # print dirs # print '********' nDirs = len(dirs) iDir = 0 ts = lt.resetTimer(nDirs,'Going over NLI dirs!') for root, dirs, files in os.walk(root_dir, topdown=True): # print "***" + root + "***" # print files ent = root.split('/')[-1] if not ent.lower().startswith(r'ie'): continue print "-----" + ent + "-----" #+ str(ent.lower().startswith(r'ie')) for name in files: key = os.path.join(ent, name) # print key if key in processedList: continue # print "=====calculating rep=====" fname = os.path.join(root, name) if not fname.endswith(('.jpg', '.png', '.tif')): continue rep, err = getImgRep(fname) if rep is not None: repDict[key]=rep else: failDict[key]=err processedList.append(key) pickle.dump(repDict,open(repFname,'w'),pickle.HIGHEST_PROTOCOL) pickle.dump(failDict,open(failFname,'w')) pickle.dump(processedList,open(processedFname,'w')) lt.sampleTimer(iDir,ts) iDir = iDir + 1
def walkAndCalcReps(root_dir): """Clasify files in a given directory to prortrait or not. Traverses all sub directories of the given root Saves to db if portrait """ processedFname = os.path.join(root_dir,'processed.pkl') repFname = os.path.join(root_dir,'repsDBase.pkl') repFnameOldFmt = os.path.join(root_dir,'repsDBaseOldFmt.pkl') failFname = os.path.join(root_dir,'failDBase.pkl') repFnameJson = os.path.join(root_dir,'repsDBase.json') startTime = None # processedFname = os.path.join(root_dir,processedFname) # repFname = os.path.join(root_dir,repFname) # failFname = os.path.join(root_dir,failFname) processedList = [] if os.path.isfile(processedFname): processedList = pickle.load(open(processedFname,'r')) repDict = {} if os.path.isfile(repFname): repDict = pickle.load(open(repFname,'r')) failDict = {} if os.path.isfile(failFname): failDict = pickle.load(open(failFname,'r')) # print repFname # print # dirs = [s for s in os.listdir(root_dir) if s.lower().startswith('ie')] # print '********' # print dirs # print '********' # nDirs = len(dirs) # iDir = 0 # first walk - to get the number of files nFiles = 0 for root, dirs, files in os.walk(root_dir, topdown=True): nFiles += len([f for f in files if f.lower().endswith(('.jpg', '.png', '.tif'))]) ts = lt.resetTimer(nFiles,'Analyzing images!', percentile=1.0) iFile = 0 for root, dirs, files in os.walk(root_dir, topdown=True): # print "***" + root + "***" # print files ent = root.split(root_dir)[-1] if ent.startswith(r'/'): ent = ent[1:] print "-----" + ent + "-----" #+ str(ent.lower().startswith(r'ie')) for name in files: key = os.path.join(ent, name) # print key # if key in processedList: # # continue # print "=====calculating rep=====" fname = os.path.join(root, name) if not fname.endswith(('.jpg', '.png', '.tif')): continue rep, err = getImgRep(fname) iFile += 1 lt.sampleTimer(iFile ,ts) if rep is not None: repDict[key]=rep else: failDict[key]=err processedList.append(key) pickle.dump(repDict,open(repFname,'w'),pickle.HIGHEST_PROTOCOL) pickle.dump(repDict,open(repFnameOldFmt, 'w')) pickle.dump(failDict,open(failFname,'w')) pickle.dump(processedList,open(processedFname,'w')) repDictJson = {k:list(v) for k,v in repDict.iteritems()} f = open(repFnameJson,'w') f.write(json.dumps(repDictJson)) f.close()