コード例 #1
0
def Comic():
    df = None
    classes = ['bicycle','bird','car','cat','dog','person']
    sets = [('comic','train'),('comic','test')]
    for base,image_set in sets:
        path_b = '/media/gonthier/HDD/data/cross-domain-detection/datasets/comic/ImageSets/Main/%s.txt'%(image_set)
        pd_b = pd.read_csv(path_b,sep=r"\s*",names=['name_img'],dtype=str)
        for c in classes:
            pd_b[c] = -1
        print(pd_b.head(5))
        for index, row in pd_b.iterrows():
            i = row['name_img']
            path_i = '/media/gonthier/HDD/data/cross-domain-detection/datasets/comic/Annotations/%s.xml'%(i)
            read_file = voc_eval.parse_rec(path_i)
            for element in read_file:
                classe_elt = element['name']
                for c in classes:
                    if classe_elt==c:
                        pd_b.loc[pd_b['name_img']==row['name_img'],c] = 1
        pd_b['set'] = image_set
        if df is None:
            df = pd_b
        else:
            df = df.append(pd_b)
    output_name = path_output + 'comic_all' + '.csv'
    df.to_csv(output_name)
    
    output_name = path_output + 'comic' + '.csv'
    df.to_csv(output_name)
コード例 #2
0
def WriteDifficultsBoxes():
    """
    This function will mark as difficult all the tiny objects in the xml files 
    """
    size_min = 25*25 # 20*20 Au moins un truc de taille superieur a 17*17
    path_b ='/media/gonthier/HDD/data/Wikidata_Paintings/WikiTenLabels/Main/test.txt'
    path_to_im = '/media/gonthier/HDD/data/Wikidata_Paintings/WikiTenLabels/JPEGImages/'
    pd_b = pd.read_csv(path_b,sep=r"\s*",names=['item'],dtype=str)
    for index, row in pd_b.iterrows():
        Erase = False
        i = row['item']
        path_i = path_to_im + i +'.jpg'
        im = cv2.imread(path_i)
        height = im.shape[0]
        width = im.shape[1]
        writer = Writer(path_i, width, height)
        pathxml = '/media/gonthier/HDD/data/Wikidata_Paintings/WikiTenLabels/Annotations/%s.xml'%(i)
        read_file = voc_eval.parse_rec(pathxml)
        for element in read_file:
            classe_elt = element['name']
            bbox = element['bbox']
            xmin = bbox[0]
            ymin = bbox[1]
            xmax = bbox[2]
            ymax = bbox[3]
            area = (xmax -xmin)*(ymax-ymin)
            if area <= size_min:
                # Marked as difficult
                element['difficult']=1
                Erase = True
                writer.addObject(classe_elt, xmin, ymin, xmax, ymax, difficult=1)
            else:
                writer.addObject(classe_elt, xmin, ymin, xmax, ymax)
        if Erase:
             writer.save(annotation_path=pathxml)
             print('Modified :',i)
    return(0)
コード例 #3
0
def Clipart():
    df = None
    classes = ["aeroplane", "bicycle", "bird", "boat", "bottle", "bus", "car", "cat", "chair", "cow", "diningtable", "dog", "horse", "motorbike", "person", "pottedplant", "sheep", "sofa", "train", "tvmonitor"]
    sets = [('clipart','train'),('clipart','test')]
    for base,image_set in sets:
        path_b = '/media/gonthier/HDD/data/cross-domain-detection/datasets/clipart/ImageSets/Main/%s.txt'%(image_set)
        pd_b = pd.read_csv(path_b,sep=r"\s*",names=['name_img'],dtype=str)
        for c in classes:
            pd_b[c] = -1
        print(pd_b.head(5))
        for index, row in pd_b.iterrows():
            i = row['name_img']
            path_i = '/media/gonthier/HDD/data/cross-domain-detection/datasets/clipart/Annotations/%s.xml'%(i)
            read_file = voc_eval.parse_rec(path_i)
            for element in read_file:
                classe_elt = element['name']
                for c in classes:
                    if classe_elt==c:
                        pd_b.loc[pd_b['name_img']==row['name_img'],c] = 1
        pd_b['set'] = image_set
        if df is None:
            df = pd_b
        else:
            df = df.append(pd_b)
    output_name = path_output + 'clipart_all' + '.csv'
    print(df.iloc[[45,46,47]])
    df.to_csv(output_name)
    
    output_name = path_output + 'clipart' + '.csv'
    # On remplace les 0 par des 1  ! les cas difficiles par des certitudes
    for c in classes: 
        df.loc[df[c]==0,c] = 1
    print(df.iloc[[45,46,47]])
    df.to_csv(output_name)

    df=pd.read_csv(output_name,dtype=str)
    print(df.iloc[[45,46,47]])
コード例 #4
0
def Watercolor():
    df = None
    classes = ["bicycle", "bird","car", "cat", "dog", "person"]
    sets = [('watercolor','train'),('watercolor','test')]
    for base,image_set in sets:
        path_b = '/media/gonthier/HDD/data/cross-domain-detection/datasets/watercolor/ImageSets/Main/%s.txt'%(image_set)
        pd_b = pd.read_csv(path_b,sep=r"\s*",names=['name_img'],dtype=str)
        for c in classes:
            pd_b[c] = -1
        print(pd_b.head(5))
        for index, row in pd_b.iterrows():
            i = row['name_img']
            path_i = '/media/gonthier/HDD/data/cross-domain-detection/datasets/watercolor/Annotations/%s.xml'%(i)
            read_file = voc_eval.parse_rec(path_i)
            for element in read_file:
                classe_elt = element['name']
                for c in classes:
                    if classe_elt==c:
                        pd_b.loc[pd_b['name_img']==row['name_img'],c] = 1
        pd_b['set'] = image_set
        if df is None:
            df = pd_b
        else:
            df = df.append(pd_b)
    output_name = path_output + 'watercolor_all' + '.csv'
    print(df.iloc[[45,46,47]])
    df.to_csv(output_name)
    
    output_name = path_output + 'watercolor' + '.csv'
    # On remplace les 0 par des 1  ! les cas difficiles par des certitudes
    for c in classes: 
        df.loc[df[c]==0,c] = 1
    print(df.iloc[[45,46,47]])
    df.to_csv(output_name)

    df=pd.read_csv(output_name,dtype=str)
    print(df.iloc[[45,46,47]])
コード例 #5
0
def StatsOnWikiTenLabels():
    annotations_folder = '/media/gonthier/HDD/data/Wikidata_Paintings/WikiTenLabels/Annotations/'
    path_data = '/media/gonthier/HDD/output_exp/ClassifPaintings/'
    name_file = path_data + 'WikiTenLabels.csv'
    classes_a_garder = ['angel','Child_Jesus','crucifixion_of_Jesus','Mary','nudity', 'ruins','Saint_Sebastien']
    df = pd.read_csv(name_file,sep=',')
    df_test = df[df['set']=='test']
    df_train = df[df['set']=='train']
    df_test[classes_a_garder] = df_test[classes_a_garder]
    print("For test")
    df_test['sum'] = df_test[classes_a_garder].sum(axis=1)
    for i in range(len(classes_a_garder)):
        print(i,len(np.where(np.array(df_test.as_matrix(['sum']).ravel(),dtype=int)==i)[0]))
    df_train[classes_a_garder] = df_train[classes_a_garder]
    print("For train")
    df_train['sum'] = df_train[classes_a_garder].sum(axis=1)
    for i in range(len(classes_a_garder)):
        print(i,len(np.where(np.array(df_train.as_matrix(['sum']).ravel(),dtype=int)==i)[0]))
        
    print('Statistiques sur le test set')
    print(df_test.sum())
    print()    
    print('Statistiques sur le train set')
    print(df_train.sum())
    
#    classes = ['angel', 'beard','capital','Child_Jesus', 'crucifixion_of_Jesus',
#                    'Mary','nudity', 'ruins','Saint_Sebastien','turban']
#    list_elt= os.listdir(annotations_folder)
#    file_test = '/media/gonthier/HDD/data/Wikidata_Paintings/WikiTenLabels/ImageSets/Main/test.txt'
#    file = open(file_test,"w") 
#    for elt in list_elt:
#        elt_wt_jpg = elt.split('.')[0]
#        str_w = elt_wt_jpg +' \n'
#        file.write(str_w) 
#    file.close()

    size_min = 25*25 # 15*15

    path_b ='/media/gonthier/HDD/data/Wikidata_Paintings/WikiTenLabels/ImageSets/Main/test.txt'
    pd_b = pd.read_csv(path_b,sep=r"\s*",names=['item'],dtype=str)
    
    dict_elts_total = {}
    dict_elts_sizemin = {}
    for c in classes_a_garder:
        pd_b[c] = 0
        dict_elts_total[c] = 0
        dict_elts_sizemin[c] = 0
    without_class = 0
    numberofIm = 0
    for index, row in pd_b.iterrows():
        numberofIm += 1
        i = row['item']
        path_i = '/media/gonthier/HDD/data/Wikidata_Paintings/WikiTenLabels/Annotations/%s.xml'%(i)
        read_file = voc_eval.parse_rec(path_i)
        with_class = False
        for element in read_file:
            classe_elt = element['name']
            bbox = element['bbox']
            xmin = bbox[0]
            ymin = bbox[1]
            xmax = bbox[2]
            ymax = bbox[3]
            area = (xmax -xmin)*(ymax-ymin) 
#            print(area)
            
            for c in classes_a_garder:
                if classe_elt==c: # We got an element from 
                    with_class = True
                    pd_b.loc[pd_b['item']==row['item'],c] = 1
                    dict_elts_total[c] += 1
                    if area > size_min:
                        dict_elts_sizemin[c] += 1
        if not(with_class):
            without_class += 1
    print('Statistiques au niveaux du nombre de classes avec les labels dans la partie annotee en detection')
    print(pd_b.sum())
    print('Nombre d instances des differentes classes')
    num_obj = 0
    for c in classes_a_garder:
        print(c,' : ',dict_elts_total[c])
        num_obj+=dict_elts_total[c]
    print('Nombre d images totales',numberofIm)
    print('Nombre d instances totales',num_obj)
    print("Nombre d images sans classes",without_class)
    print('Nombre d instances des differentes classes avec une taille superieur a :',size_min,'pixels',num_obj)
    num_obj = 0
    for c in classes_a_garder:
        print(c,' : ',dict_elts_sizemin[c])
        num_obj+=dict_elts_sizemin[c]
    print('Nombre d instances de taille superieur a ',size_min,'pixels',num_obj)
コード例 #6
0
def Stats_and_testFile():
    annotations_folder = '/media/gonthier/HDD/data/Wikidata_Paintings/WikiTenLabels/Annotations/'
    path_data = '/media/gonthier/HDD/output_exp/ClassifPaintings/'
    name_file = path_data + 'WikiTenLabels.csv'
    df = pd.read_csv(name_file,sep=',')
    df_test = df[df['set']=='test']
    
    print('Statistiques sur le test set')
    print(df_test.sum())
    
    classes = ['angel', 'beard','capital','Child_Jesus', 'crucifixion_of_Jesus',
                    'Mary','nudity', 'ruins','Saint_Sebastien','turban']
    list_elt= os.listdir(annotations_folder)
    write_test_file = False
    if write_test_file:
        file_test = '/media/gonthier/HDD/data/Wikidata_Paintings/WikiTenLabels/Main/ImageSets/test.txt'
        file = open(file_test,"w") 
        for elt in list_elt:
            elt_wt_jpg = elt.split('.')[0]
            str_w = elt_wt_jpg +' \n'
            file.write(str_w) 
        file.close()

    size_min = 25*25 # 15*15

    path_b ='/media/gonthier/HDD/data/Wikidata_Paintings/WikiTenLabels/ImageSets/Main/test.txt'
    pd_b = pd.read_csv(path_b,sep=r"\s*",names=['item'],dtype=str)
    
    dict_elts_total = {}
    dict_elts_sizemin = {}
    for c in classes:
        pd_b[c] = 0
        dict_elts_total[c] = 0
        dict_elts_sizemin[c] = 0
        
    for index, row in pd_b.iterrows():
        i = row['item']
        path_i = '/media/gonthier/HDD/data/Wikidata_Paintings/WikiTenLabels/Annotations/%s.xml'%(i)
        read_file = voc_eval.parse_rec(path_i)
        for element in read_file:
            classe_elt = element['name']
            bbox = element['bbox']
            xmin = bbox[0]
            ymin = bbox[1]
            xmax = bbox[2]
            ymax = bbox[3]
            area = (xmax -xmin)*(ymax-ymin) 
#            print(area)
            for c in classes:
                if classe_elt==c: # We got an element from 
                    pd_b.loc[pd_b['item']==row['item'],c] = 1
                    dict_elts_total[c] += 1
                    if area > size_min:
                        dict_elts_sizemin[c] += 1
    print('Statistiques au niveaux du nombre de classes avec les labels dans la partie annotee en detection')
    print(pd_b.sum())
    print('Nombre d instances des differentes classes')
    num_obj = 0
    for c in classes:
        print(c,' : ',dict_elts_total[c])
        num_obj+=dict_elts_total[c]
    print('Nombre d instances totales',num_obj)
    print('Nombre d instances des differentes classes avec une taille superieur a :',size_min,'pixels',num_obj)
    num_obj = 0
    for c in classes:
        print(c,' : ',dict_elts_sizemin[c])
        num_obj+=dict_elts_sizemin[c]
    print('Nombre d instances de taille superieur a ',size_min,'pixels',num_obj)
コード例 #7
0
def CASPApaintings(copyFile=False,convertXML=False,copyIm=False):
    """
    Creation of the CASPA paintings dataset for WSOD training and evaluation
    """
    df = None
    classes = ["bear", "bird", "cat", "cow", "dog", "elephant", "horse", "sheep"]
    old_names = ["Bear", "bird", "cat", "Cattle Cow Bull", "dog", "elephant", "horse", "sheep shepherd"]
    
    old_img_folder = 'Images_Per_Class'
    default_path_imdb = '/media/gonthier/HDD2/data/'
    
    if convertXML:
        for old_name in old_names :
            ff = old_name.lower()
            ff = ff.replace(' ','_')
            for folder in ['non','realistic']:
                f = ff +'_'+folder
                old_path= default_path_imdb + 'CASPApaintings/Annotations/'+f 
                list_xml = glob.glob(old_path+'/*.xml')
                for elt in list_xml:
                    dst = default_path_imdb + 'CASPApaintings/Annotations/' + elt.split('/')[-1]
                    path_img = default_path_imdb + 'CASPApaintings/JPEGImages/' + elt.split('/')[-1].replace('xml','jpg')
                    # read the old xml file
                    old_tree=ET.parse(elt)
                    root = old_tree.getroot()
                    size = root.find('imagesize')
                    width = int(size.find('ncols').text)
                    height = int(size.find('nrows').text)
                    list_bd =  []
                    for obj in root.iter('object'):
                        cls = obj.find('name').text
                        
                        if cls not in classes:
                            continue
                        #cls_id = classes.index(cls)
                        polygon = obj.find('polygon')
                        points = polygon.iter('pt')
                        xmin = None
                        ymin = None
                        xmax = None
                        ymax = None
                        for i,pt in enumerate(points):
                            x = int(pt.find('x').text)
                            y = int(pt.find('y').text)
                            if i == 0:
                                xmin = x
                                ymin = y
                            elif i ==1:
                                xmax = x
                            elif i==2:
                                ymax = y
                        list_bd += [[cls,xmin,ymin,xmax,ymax]]
                    WriteNewXMLfile(dst,path_img,width,height,list_bd)
    
    if copyIm:
        # Copie of images in the new folders
        for f_name in old_names:
            for folder in ['non','realistic']:
                old_path= default_path_imdb + 'CASPApaintings/' +  old_img_folder +'/'+f_name+'/'+folder
                list_im = glob.glob(old_path+'/*.jpg')
                for elt in list_im:
                    lower_name_img = elt.split('/')[-1]
                    lower_name_img = lower_name_img.lower()
                    print(lower_name_img)
                    dst = default_path_imdb + 'CASPApaintings/JPEGImages/' + lower_name_img
                    copyfile(elt,dst)
    if copyFile:  
        # Creation of the train and set file per class
        df_test= None
        df_train = None
        for new_name,old_name in zip(classes,old_names):
            #print(new_name,old_name)
            path_c = default_path_imdb + 'CASPApaintings/' +  old_img_folder+'/' + old_name + '.txt'
            pd_c =  pd.read_csv(path_c,names=['name_img'],dtype=str, encoding='utf-8')
            pd_c_train, pd_c_test = train_test_split(pd_c,test_size=0.5, random_state=0)
            pd_c_train['name_img'] = pd_c_train['name_img'].str.replace('.jpg','')
            pd_c_train['name_img'] = pd_c_train['name_img'].str.lower()
    #        pd_c_train = pd_c_train.apply((lambda x : x.replace('.jpg','')),axis=0)
            pd_c_test['name_img'] = pd_c_test['name_img'].str.replace('.jpg','')
            pd_c_test['name_img'] = pd_c_test['name_img'].str.lower()
    #        pd_c_test =pd_c_test.apply((lambda x : x.replace('.jpg','')),axis=0)
            path_c_train =  default_path_imdb + 'CASPApaintings/ImageSets/Main/' + new_name + '_train.txt'
            path_c_test =  default_path_imdb + 'CASPApaintings/ImageSets/Main/' + new_name + '_test.txt'
            pd_c_train.to_csv(path_c_train,header=False,index=False)
            pd_c_test.to_csv(path_c_test,header=False,index=False)
            #print(pd_c_test.head(5))
            if df_train is None:
                df_train = pd_c_train
            else:
                df_train = df_train.append(pd_c_train)
            if df_test is None:
                df_test = pd_c_test
            else:
                df_test = df_test.append(pd_c_test)
            
        df_test = df_test.drop_duplicates(subset ='name_img') 
        df_train = df_train.drop_duplicates(subset ='name_img')
        df_train = df_train['name_img']
        df_test = df_test['name_img']
        #print(df_test.head(5))
        num_test_drop = 0
        for row in df_test.values:
            name_im = default_path_imdb + 'CASPApaintings/JPEGImages/' + row +'.jpg'
            if not os.path.isfile(name_im):
                #print(row,name_im)
                df_test = df_test[df_test!=row]
                num_test_drop += 1
        num_train_drop = 0
        for row in df_train.values:
            name_im = default_path_imdb + 'CASPApaintings/JPEGImages/' + row +'.jpg'
            if not os.path.isfile(name_im):
                print(row,name_im)
                df_train = df_train[df_train!=row]
                num_train_drop += 1
        print('number of drop images for train :',num_train_drop,'for test',num_test_drop)
        path_train =default_path_imdb + 'CASPApaintings/ImageSets/Main/train.txt'
        path_test =default_path_imdb + 'CASPApaintings/ImageSets/Main/test.txt'
        df_train.to_csv(path_train,header=False,index=False)
        df_test.to_csv(path_test,header=False,index=False)
        
        # Copie of the xml files
        # Besoin de convertir en Pascal voc format
    
        
    oneNotFound = False
    list_mising_xml = []
    sets = [('CASPApaintings','train'),('CASPApaintings','test')]
    for base,image_set in sets:
        path_b = '/media/gonthier/HDD2/data/CASPApaintings/ImageSets/Main/%s.txt'%(image_set)
        pd_b = pd.read_csv(path_b,names=['name_img'],dtype=str, encoding='utf-8')
        for c in classes:
            pd_b[c] = -1
        for index, row in pd_b.iterrows():
            i = row['name_img']
            path_i = '/media/gonthier/HDD2/data/CASPApaintings/Annotations/%s.xml'%(i)
            try:
                read_file = voc_eval.parse_rec(path_i)
                for element in read_file:
                    classe_elt = element['name']
                    for c in classes:
                        if classe_elt==c:
                            pd_b.loc[pd_b['name_img']==row['name_img'],c] = 1
            except FileNotFoundError:
                print(path_i,'not found')
                oneNotFound = True
                list_mising_xml += [i]
        pd_b['set'] = image_set
        if df is None:
            df = pd_b
        else:
            df = df.append(pd_b)
    df = df.drop_duplicates(subset ='name_img') 
    print('Size dataset :',len(df))
    
    print(list_mising_xml)
#    for  i in list_mising_xml:
#        name_img = default_path_imdb + 'CASPApaintings/JPEGImages/' + i +'.jpg'
#        plt.figure()
#        imreadoutput = plt.imread(name_img)
#        plt.imshow(imreadoutput)
#    input('wait')
    
    c_missing = [['bear','horse'],['bear'],['bear'],['elephant'],['horse'],['horse'],['horse'],['horse'],['bear'],['bear','sheep'],['bear','dog'],['bear'],['bear'],['bear'],['horse']]
      
    print('Modification of missing elements')       
    for classes,name in zip(c_missing,list_mising_xml):
        print(name,df[df['name_img']==name]['set'])
        for c in classes:
            df.loc[df['name_img']==name,c] = 1
        df.loc[df['name_img']==name,'set'] = 'train'
    print('Size of datasets : ',len(df),'size train set :',len(df[df['set']=='train']),'size test set :',len(df[df['set']=='test']))
    oneNotFound = False
    
    df.loc[df['name_img']=='4-cat-and-cattle','cat'] = 1
    
    if not(oneNotFound):
        output_name = path_output + 'CASPApaintings_all' + '.csv'
        df.to_csv(output_name,index=False)
        
        output_name = path_output + 'CASPApaintings' + '.csv'
        df.to_csv(output_name,index=False)
        output_name = default_path_imdb + 'CASPApaintings/ImageSets/Main/CASPApaintings.csv'
        df.to_csv(output_name,index=False)
    else:
        output_name = path_output + 'CASPApaintings_tmp' + '.csv'
        df.to_csv(output_name,index=False)
        print('XML not found, not saved')
        
    # Il faut extraire les  fichiers test et etc
    output_name_csv = path_output + 'CASPApaintings' + '.csv'
    print(output_name_csv)
    df_new =  pd.read_csv(output_name_csv,dtype=str)
    for set in ['train','test']:
        df_set = df_new[df_new['set']==set]
        print(set,len(df_set))
        all_names = df_set['name_img'].values
        output_name = default_path_imdb + 'CASPApaintings/ImageSets/Main/'+set+'.txt' 
        np.savetxt(output_name, all_names, delimiter='\n',fmt='%s')
        for c in classes:
            df_c = df_set[df_set[c]==str(1)]
            all_names_c = df_c['name_img'].values
            output_name = default_path_imdb + 'CASPApaintings/ImageSets/Main/'+c+'_'+set+'.txt' 
            np.savetxt(output_name, all_names_c, delimiter='\n',fmt='%s')