def find_xmls_with_minimal_size_of_this_cate(xmlDir, cate): allXmls = FILES.get_sorted_files(xmlDir) MinW = 9999 MinH = 9999 MinA = MinW * MinH for xml in allXmls: xmlPath = os.path.join(xmlDir, xml) wMinPath = os.path.join(tarDir, '{}_wMin.xml'.format(cate)) hMinPath = os.path.join(tarDir, '{}_hMin.xml'.format(cate)) aMinPath = os.path.join(tarDir, '{}_aMin.xml'.format(cate)) txts = TXT.read_txt(xmlPath) soup = BeautifulSoup(txts, "xml") objs = soup.find_all('object') for obj in objs: name = obj.find('name').get_text() if name == cate: xmin = int(obj.find('xmin').get_text()) xmax = int(obj.find('xmax').get_text()) ymin = int(obj.find('ymin').get_text()) ymax = int(obj.find('ymax').get_text()) width = xmax - xmin height = ymax - ymin area = width * height if width < MinW: MinW = width shutil.copy(xmlPath, wMinPath) if height < MinH: MinH = height shutil.copy(xmlPath, hMinPath) if area < MinA: MinA = area shutil.copy(xmlPath, aMinPath) print("cate: {} MinW={}, MinH={}, MinA={}".format(cate, MinW, MinH, MinA))
def find_most_cates_with_cate(xmlDir, cate, savedCates): maxCates = 0 allXmls = FILES.get_sorted_files(xmlDir) for xml in allXmls: labelDict = {} xmlPath = os.path.join(xmlDir, xml) tarPath = os.path.join(tarDir, xml) txts = TXT.read_txt(xmlPath) soup = BeautifulSoup(txts, "xml") objs = soup.find_all('object') for obj in objs: name = obj.find('name').get_text() # find函数只找到第一个相关的内容 if name not in labelDict.keys(): labelDict[name] = 1 else: labelDict[name] += 1 if cate in labelDict.keys(): if len(labelDict) > maxCates: maxCates = len(labelDict) maxXml = xmlPath maxTar = tarPath maxAddCates = labelDict.keys() print("maxXml={}, maxTar={}".format(maxXml, maxTar)) shutil.copy(maxXml, maxTar) for maxAddCate in maxAddCates: savedCates.append(maxAddCate)
def remove_img(): imgs = [x for x in FILES.get_sorted_files(imgDir)] xmls = [x for x in FILES.get_sorted_files(xmlDir)] # print(imgs) # remove img count = 0 for img in tqdm(imgs): find_xml_name = img.replace(imgType, xmlType) # print(find_img_name) if find_xml_name in xmls: pass # print('save %s'%img) else: print('del %s' % img) os.remove(os.path.join(imgDir, img)) count += 1 print("remove {} imgs.\n".format(count))
def sample_random(sampleDir,tarDir,remainNum): allFiles=FILES.get_sorted_files(sampleDir) random.shuffle(allFiles) count=0 for file in allFiles: if count<remainNum: filePath=os.path.join(sampleDir,file) tarPath=os.path.join(tarDir,file) shutil.move(filePath,tarPath) count+=1
def remove_xml(): imgs = [x for x in FILES.get_sorted_files(imgDir)] xmls = [x for x in FILES.get_sorted_files(xmlDir)] # print(imgs) # remove xml count = 0 for xml in tqdm(xmls): find_img_name = xml.replace(xmlType, imgType) # print(find_img_name) if find_img_name in imgs: pass # print('save %s'%xml) else: print('del %s' % xml) os.remove(os.path.join(xmlDir, xml)) count += 1 print("remove {} xmls.\n".format(count))
def copy_files_refer_dir(FileDir, FileForm, ReferDir, xmlTar): allRefers = FILES.get_sorted_files(ReferDir) for refer in allRefers: file = refer.split('.')[0] + FileForm sourPath = os.path.join(FileDir, file) tarPath = os.path.join(xmlTar, file) try: shutil.copy(sourPath, tarPath) except: pass
def find_all_cates(xmlDir): labelDict = {} allXmls = FILES.get_sorted_files(xmlDir) for xml in allXmls: xmlPath = os.path.join(xmlDir, xml) txts = TXT.read_txt(xmlPath) soup = BeautifulSoup(txts, "xml") objs = soup.find_all('object') for obj in objs: name = obj.find('name').get_text() # find函数只找到第一个相关的内容 if name not in labelDict.keys(): labelDict[name] = 1 else: labelDict[name] += 1 return labelDict.keys()
def move_xmls_with_cates(xmlDir, cates): allXmls = FILES.get_sorted_files(xmlDir) ii = 0 for xml in allXmls: xmlPath = os.path.join(xmlDir, xml) tarPath = os.path.join(tarDir, xml) txts = TXT.read_txt(xmlPath) soup = BeautifulSoup(txts, "xml") objs = soup.find_all('object') for obj in objs: name = obj.find('name').get_text() if name in cates: shutil.move(xmlPath, tarPath) ii += 1 break
def sample_files(sampleDir, tarDir, remainNum): allFiles = FILES.get_sorted_files(sampleDir) filesCount = len(allFiles) REMRATE = filesCount / remainNum remRate = REMRATE count = int(REMRATE) + 1 fileNum = 0 for file in allFiles: if count - remRate > 0: filePath = os.path.join(sampleDir, file) tarPath = os.path.join(tarDir, file) shutil.copy(filePath, tarPath) fileNum += 1 remRate += REMRATE count += 1
def filter_xmls_without_cates(xmlDir, cates): allXmls = FILES.get_sorted_files(xmlDir) for xml in allXmls: xmlPath = os.path.join(xmlDir, xml) tarPath = os.path.join(tarDir, xml) txts = TXT.read_txt(xmlPath) soup = BeautifulSoup(txts, "xml") objs = soup.find_all('object') noTruck = 1 for obj in objs: name = obj.find('name').get_text() if name in cates: noTruck = 0 break if noTruck == 1: shutil.copy(xmlPath, tarPath)
def filter_short_cates(xmlDir, cates): allXmls = FILES.get_sorted_files(xmlDir) for xml in allXmls: xmlPath = os.path.join(xmlDir, xml) tarPath = os.path.join(tarDir, xml) txts = TXT.read_txt(xmlPath) soup = BeautifulSoup(txts, "xml") objs = soup.find_all('object') for obj in objs: name = obj.find('name').get_text() if name in cates: bndb = obj.find("bndbox") xsize = int(bndb.find('xmax').text) - int( bndb.find('xmin').text) ysize = int(bndb.find('ymax').text) - int( bndb.find('ymin').text) if xsize > ysize * 0.9: shutil.copy(xmlPath, tarPath) break
def filter_lack_helmet(xmlDir): personCates = ['person'] helmetCates = ['red', 'yellow', 'blue', 'white', 'orange'] allXmls = FILES.get_sorted_files(xmlDir) for xml in allXmls: numPer = 0 numHel = 0 xmlPath = os.path.join(xmlDir, xml) lackPath = os.path.join(tarDir, xml) txts = TXT.read_txt(xmlPath) soup = BeautifulSoup(txts, "xml") objs = soup.find_all('object') for obj in objs: name = obj.find('name').get_text() if name in personCates: numPer += 1 elif name in helmetCates: numHel += 1 else: print("Lack of category {} set in function filter_lack_helmet". format(name)) if numPer > numHel: shutil.copy(xmlPath, lackPath)
# encoding=utf8 import os from basicFun import FILES if __name__ == "__main__": count = 0 rmvDir = r"/DATACENTER3/yh/detectron/caffe2/detect/factory/people_longVideoAll/" referDirRoot = r"/DATACENTER3/yh/detectron/caffe2/detect/factory" # referDirNames=["tankClose","tankNoPipe","tankPipe","terribleCover"] referDirNames = ["outXml"] referType = ".xml" rmvType = ".jpg" # referDirNames=["1","2","3"] allRmvs = [x for x in FILES.get_sorted_files(rmvDir)] for referDirName in referDirNames: referDir = referDirRoot + '/' + referDirName allRefers = [ x for x in FILES.get_sorted_files(referDir) if referType in x ] # 审核为.jpg形成列表 for rmv in allRmvs: if (rmv.split('.')[0] + referType) not in allRefers: removePath = rmvDir + '/' + rmv.split('.')[0] + rmvType # removePath=rmvDir+'/'+rmv os.remove(removePath) count += 1 print("cancel {} files={}\n".format(rmvType, count))
import os, shutil from basicFun import FILES dir1 = r"/disk2/hao.yang/project/Qin/data/preProcess/tolabelImg/0501/safe_cashbox_FengShi_simulate/" des1 = r"/disk2/hao.yang/project/Qin/data/preProcess/tolabelImg/0501/safe_cashbox_FengShi_simulate_img/" dir2 = '/disk2/hao.yang/project/Qin/data/preProcess/tolabelImg/0501/xml/' des2 = '/disk2/hao.yang/project/Qin/data/preProcess/tolabelImg/0501/labeled_tube_px/' FILES.mkdir(des1) FILES.mkdir(des2) preTitle = 'safe_cashbox_FengShi_simulate_' endTitle = '' i = 0 filelist = FILES.get_sorted_files(dir1) for file in filelist: if ".jpg" in file: newfileName = '%06d' % i # newfileName=file # shutil.copy(os.path.join(dir1,file),os.path.join(des1,preTitle+newfileName+endTitle+".jpg")) shutil.copy(os.path.join(dir1, file), os.path.join(des1, preTitle + file)) if os.path.exists( os.path.join( dir2, 'unload_FengShi_tube_simulate_' + newfileName + endTitle + ".xml")): shutil.copy( os.path.join( dir2, 'unload_FengShi_tube_simulate_' + newfileName + endTitle + ".xml"), os.path.join(des2, preTitle + file.replace('.jpg', ".xml"))) else: print(file, "no xml") i += 1
#encoding=utf-8 import os import shutil from basicFun import FILES if __name__ == "__main__": kind = 'door_close' referXml = "/DATACENTER5/hao.yang/dataRoom/Qin/safe/caotan/referXml/{}.xml".format( kind) xmlDir = "/DATACENTER5/hao.yang/dataRoom/Qin/safe/caotan/labels_door/" imgDir = "/DATACENTER5/hao.yang/dataRoom/Qin/safe/caotan/safe_door_classified_all/{}".format( kind) allfiles = [x for x in FILES.get_sorted_files(imgDir)] for file in allfiles: newPath = os.path.join(xmlDir, file.split('.')[0] + '.xml') shutil.copy(referXml, newPath)
#encoding=utf-8 import os import shutil import time from basicFun import FILES jpgRoot=r"/DATACENTER2/yh/dataRoom/gasStation/tank/train/sj18JunFrames" if __name__=="__main__": remainNum=500 start = time.time() jpgDirs=FILES.get_sub_dirs(jpgRoot) for dirName in jpgDirs: jpgDir=jpgRoot+'/'+dirName allJpgs=FILES.get_sorted_files(jpgDir) jpgCount=len(allJpgs) if jpgCount>3000: for jpg in allJpgs[remainNum:]: jpgPath=os.path.join(jpgDir,jpg) os.remove(jpgPath) # def removeRemain(rmdir,remainNum): # allFiles=FILES.get_sorted_files(rmdir) # filesCount=len(rmdir) # for # remRate=REMRATE # # print(rmRate) # count=int(REMRATE)+1 # fileNum=0 # for file in allFiles: # if count-remRate>0: # filePath=dirDir+'/'+file # tarPath=tarDir+'/'+file
if os.path.exists(dirpath): difdirpath = dirpath.replace('extractFrame', 'removedSimilarityFrame') # if difdirpath[-1]=='/': # difdirpath=difdirpath[:-1]+"Unique"+str(int(SIMIBOUND*1000)) # else: # difdirpath=difdirpath+"Unique"+str(int(SIMIBOUND*1000)) if dirCount < startdir: dirCount += 1 continue if dirCount > enddir: break begin = 0 restart = 1 FILES.rm_mkdir(difdirpath) allFILES = FILES.get_sorted_files(dirpath) jpgsCount = len(allFILES) for file in allFILES: if ".jpg" in file: # basePath='/DATACENTER5/hao.yang/dataRoom/Qin/trade/zhangbabeilu/29AD13E0_1552790204_1_000061.jpg' checkPath = os.path.join(dirpath, file) # print(checkPath) if 'shgjihsjkghj' in file: restart = 1 if restart == 0: continue if begin == 0: difpath = os.path.join(difdirpath, file) shutil.copy(checkPath, difpath) basePath = checkPath begin = 1
#encoding=utf-8 'assign img task to members as sequence' import os import shutil import random # personal lib from basicFun import FILES if __name__ == "__main__": namei = 0 members = ['day_1', 'day_2', 'day_3', 'day_4'] taskDir = '/DATACENTER2/ke.cao/oil_video_Data/unload_images_dif' outRoot = taskDir + '_day' FILES.mkdir(outRoot) allImgs = FILES.get_sorted_files(taskDir) # 每人大概分配的数量 perAmount = int(len(allImgs) / len(members)) # 可能会有分不匀的情况 redundant = len(allImgs) % len(members) print('{}*{}+{}=?{}'.format(perAmount, len(members), redundant, len(allImgs))) assignCount = 0 for member in members: outDir = os.path.join(outRoot, member) FILES.mkdir(outDir) memberAmount = perAmount for img in allImgs: if assignCount < memberAmount: # print(members[namei]) desDir = os.path.join(outRoot, members[namei]) # jpg move srcPath = os.path.join(taskDir, img)
sourPath = os.path.join(FileDir, file) tarPath = os.path.join(xmlTar, file) try: shutil.copy(sourPath, tarPath) except: pass sites = ['FengShi', 'MingDeMen', 'XiWan'] for site in sites: for isle in range(1, 6): outXmlDir = "/disk2/hao.yang/project/Qin/data/imgs/isle/{}/isle{}_{}_img/".format( site, isle, site) volume = 1000 partition = 1 allFiles = FILES.get_sorted_files(outXmlDir) random.shuffle(allFiles) count = 0 for file in allFiles: if count < volume + volume * (partition - 1) * 0.2: count += 1 else: # exit() # break partition += 1 count = 0 # tar_outXmlDir='{}_partition{}'.format(outXmlDir,partition) pDir = '/disk2/hao.yang/project/Qin/data/labelTask/isles/p{}'.format( partition) FILES.mkdir(pDir) tar_outXmlDir = '/disk2/hao.yang/project/Qin/data/labelTask/isles/p{}/{}_isle{}'.format(
print(iou) if iou > 0.6: labelObj['name'] = modelObj['name'] return labelObj modelXmlDir = '/disk2/hao.yang/project/Qin/data/imgs/isle/modelxml_isle_17117_withred/' labelXmlDir = r'/disk2/hao.yang/project/Qin/data/imgs/isle/modelxml_isle_17117_havered/' # checkedDir='/disk2/hao.yang/project/Qin/data/xmls/checkout/complete' attentionNames = ['red'] confuseNames = ['yellow', 'other', 'blue'] count = 0 allChecked = [] # allChecked=[x for x in FILES.get_sorted_files(checkedDir) if ".xml" in x] allXmls = [ x for x in FILES.get_sorted_files(modelXmlDir) if ".xml" in x and x not in allChecked ] for xml in allXmls: labelXmlPath = os.path.join(labelXmlDir, xml) modelXmlPath = os.path.join(modelXmlDir, xml) if os.path.exists(labelXmlPath): labelObjs = XML.read_objects(labelXmlPath) modelObjs = XML.read_objects(modelXmlPath) newObjs = [] for labelObj in labelObjs: if labelObj['name'] in confuseNames: newObjs.append(fuse(labelObj, modelObjs)) else: newObjs.append(labelObj) tree = ET.ElementTree(file=labelXmlPath)
file = refer.split('.')[0] + FileForm sourPath = os.path.join(FileDir, file) tarPath = os.path.join(xmlTar, file) try: shutil.copy(sourPath, tarPath) except: pass xmlDir = r"E:\factory\voc\add_data\summary\rawImg\Tank_jl" jpgDir = r"E:\research\lackLabel\voc\img4206" xmlTar = r"E:\factory\voc\add_data\summary\rawImg\Tank_jl_lite" jpgTar = r"E:\research\lackLabel\voc\img2103_1" remainNum = 70 FILES.rm_mkdir(xmlTar) # FILES.rm_mkdir(jpgTar) allFiles = FILES.get_sorted_files(xmlDir) filesCount = len(allFiles) REMRATE = filesCount / remainNum remRate = REMRATE count = int(REMRATE) + 1 fileNum = 0 for file in allFiles: if count - remRate > 0: filePath = xmlDir + '/' + file tarPath = xmlTar + '/' + file shutil.copy(filePath, tarPath) fileNum += 1 remRate += REMRATE count += 1 # copy_files_refer_dir(jpgDir,'.jpg',xmlTar,jpgTar)
if imgDir[-1] == '/': errorDir = imgDir[:-1] + "_error" difDir = imgDir[:-1] + "_dif" else: errorDir = imgDir + "_error" difDir = imgDir + "_dif" if xmlDir[-1] == '/': difXmlDir = xmlDir[:-1] + "_dif" else: difXmlDir = xmlDir + "_dif" print(xmlDir) FILES.rm_mkdir(difXmlDir) FILES.rm_mkdir(difDir) # allXmls=[x for x in FILES.get_sorted_files(xmlDir) if '.xml' in x] allXmls = [ x.replace('.jpg', '.xml') for x in FILES.get_sorted_files(imgDir) if '.jpg' in x ] begin = 0 labelNumber = 0 difNumber = 0 resume = 0 t = 0 baseTitle = 'baseTitle' print(len(allXmls)) for xmlName in tqdm(allXmls): curTitle = xmlName.split('_000')[0] if '.' not in curTitle: checkTitle = curTitle else: checkTitle = baseTitle
#encoding=utf8 #所有xml文件内含box的数目 import os, sys from bs4 import BeautifulSoup import shutil from basicFun import FILES from basicFun import TXT from tqdm import tqdm if __name__ == "__main__": labelDict = {} xmlpath = sys.argv[1] index = 0 safeboxCount = 0 allxml = [x for x in FILES.get_sorted_files(xmlpath) if ".xml" in x] for xmlName in tqdm(allxml): xmlpath3 = os.path.join(xmlpath, xmlName) txts = TXT.read_txt(xmlpath3) soup = BeautifulSoup(txts, "xml") #解析xml文 section = soup.find_all('object') #通过object标签匹配所有相关的标签中的内容 for sec in section: name = sec.find('name').get_text() # find函数只找到第一个相关的内容 if name not in labelDict.keys(): labelDict[name] = 1 else: labelDict[name] += 1 print(xmlpath, '\nFILES=', len(allxml), "classes=", len(labelDict)) print(labelDict.items())
XML.write_xml(tree,xmlPath) def add_new_cates(xmlPath,referPath,addCates): addBoxes=XML.read_objects(referPath) tree = ET.ElementTree(file=xmlPath) root = tree.getroot() root=XML.del_tag(root,addCates) # print(addBoxes) for box in addBoxes: if box['name'] in addCates: root=XML.add_tag(root,box) XML.write_xml(tree,xmlPath) if __name__=="__main__": # imgDir=r"F:\gasStation\tank\regular_dataset\SanJiang_tankbox\checked\no_pipe" xmlDir=r'/disk2/hao.yang/project/Qin/data/xmls/guide/complete/guide_FMXX_8000_complete/' referRoot='/disk2/hao.yang/project/Qin/data/xmls/guide/complete/oilstation/' referDirs=FILES.get_sub_dirs(referRoot) for dirName in referDirs: referDir=os.path.join(referRoot,dirName) # referPath=r'F:\gasStation\tank\regular_dataset\SanJiang_tankbox\tank\SanJiang_refer_9.xml' # addCates=['car','motorcycle','truck','bus'] # addCates=['blue','yellow','other','security','red'] # addCates=['tank_close','tank_open'] # addCates=['cashbox_close','cashbox_open'] addCates=['gun_in','gun_out'] allXmls=[x for x in FILES.get_sorted_files(xmlDir) if ".xml" in x] for xml in allXmls: xmlPath=os.path.join(xmlDir,xml) referPath=os.path.join(referDir,xml) if os.path.exists(referPath): add_cates(xmlPath,referPath,addCates)
# sys.stdout.write('\r>> Detecting {name} {count:.2f}%'.format(name=videoPath.split('/')[-1],count=(100*float(i)/total_frames))) sys.stdout.flush() return filteredXmls xmlDir = r"/DATACENTER2/hao.yang/project/Qin/checkout/FengShi/xml1383_8kinds/" if xmlDir[-1] == '/': xmlTar = xmlDir[:-1] + '_val' else: xmlTar = xmlDir + '_val' FILES.mkdir(xmlTar) # Get leastNum boxes of the cate at least leastNum = 40 count = 0 valCount = 0 allXmls = FILES.get_sorted_files(xmlDir) # Filter xmls having a box of a cate cateName = 'pos_idle' print('Start to filter xmls containing cate {}'.format(cateName)) filteredXmls = filter_by_cate(allXmls, cateName) print('\nTotally {} xmls containing cate {}'.format(len(filteredXmls), cateName)) valRate = int(len(filteredXmls) / leastNum) for xml in filteredXmls: if count % valRate == 0: xmlPath = os.path.join(xmlDir, xml) tarPath = os.path.join(xmlTar, xml) shutil.copy(xmlPath, tarPath) valCount += 1 count += 1 print('Made validation set with {} xmls'.format(valCount))
newObjs.append(labelObj) return differ, newObjs imgDir = r'/DATACENTER4/hao.yang/project/Qin/data/imgs/safe/safe_FMXX_img/' # modelXmlDir=imgDir[:-1]+'_modelxml_baseline4' modelXmlDir = '/DATACENTER4/hao.yang/project/Qin/data/xmls/safe/modelXml_safe_FMXX/' labelXmlDir = r'/DATACENTER4/hao.yang/project/Qin/data/xmls/safe/safe_FMXX_22983_total/' badDir = '/DATACENTER4/hao.yang/project/Qin/data/xmls/safe/bad_safe_FMXX/' checkedDir = '/' FILES.rm_mkdir(badDir) nocareKinds = ['cashbox_close', 'cashbox_open', 'safe_hide', 'jug'] bigNames = [] # bigNames不可能有漏标,如果漏掉,必然是故意的,所以不算错误 count = 0 allChecked = [x for x in FILES.get_sorted_files(checkedDir) if ".xml" in x] if allChecked: allXmls = [ x for x in FILES.get_sorted_files(labelXmlDir) if ".xml" in x and x not in allChecked ] else: allXmls = [x for x in FILES.get_sorted_files(modelXmlDir) if ".xml" in x] for xml in tqdm(allXmls): labelXmlPath = os.path.join(labelXmlDir, xml) modelXmlPath = os.path.join(modelXmlDir, xml) if os.path.exists(labelXmlPath): modelObjs = XML.read_objects(modelXmlPath) labelObjs = XML.read_objects(labelXmlPath) differ = 0 differ, newObjs = combine(modelObjs, labelObjs)
'ph': 267, 'xgt': 318, 'yxy': 360, 'zt': 259, 'hyd': 276, 'hjc': 413 } taskImg = r'E:\factory\voc\helmet_img' taskXml = r'E:\factory\voc\labeled_xml_lackHel\xml' outRoot = r'E:\factory\voc\labeled_xml_lackHel_assignTask' for member in members: outDir = outRoot + '/' + member FILES.rm_mkdir(outDir) FILES.mkdir(outDir + '/img') FILES.mkdir(outDir + '/xml') allXmls = FILES.get_sorted_files(taskXml) random.shuffle(allXmls) memberI = 0 memberF = 0 for xml in allXmls: print(memberI) member = members[memberI] desDir = outRoot + '/' + member xmlPath = taskXml + '/' + xml desXml = desDir + '/xml/' + xml jpg = xml.split('.')[0] + '.jpg' jpgPath = taskImg + '/' + jpg desJpg = desDir + '/img/' + jpg # copy file shutil.copy(xmlPath, desXml) shutil.copy(jpgPath, desJpg)