def load_file_csv(path, file_name): path_list = extract_all_path(path, 'csv') data = pd.read_csv(path_list[0]) for path_file in path_list[1:]: data1 = pd.read_csv(path_file) data = data.append(data1) data.to_csv(path + file_name + '.csv', index=False) EDA_data_label(path + file_name + '.csv', path + file_name + '.png')
def split_train_test_data(path): path_list = extract_all_path(path, 'csv') for path_file in path_list: data = pd.read_csv(path_file) data_group = data.groupby(['names_image', 'list_label'], axis=0).count().reset_index() data_group = data_group.sample(len(data_group)) msk = np.random.rand(len(data_group)) < 0.8 train_merge = data_group[msk] test_merge = data_group[~msk] print(train_merge.shape) print(test_merge.shape) # train.to_csv('data/data_0930_merge_2/train_merge.csv',index=False) # test.to_csv('data/data_0930_merge_2/test_merge.csv', index=False) # data = pd.read_csv('data/data_0930_merge_2/data_0930_merge_2.csv') # train_label = pd.read_csv('data/data_0930_merge_2/train_merge.csv') train = data[~data['names_image'].isin(train_merge['names_image'])] test = data[~data['names_image'].isin(test_merge['names_image'])] if len(train) > len(test): train.to_csv('data/data_0930_merge_2/data_unmerge/train/' + path_file.split('/')[-1], index=False) EDA_data_label( 'data/data_0930_merge_2/data_unmerge/train/' + path_file.split('/')[-1], 'data/data_0930_merge_2/data_unmerge/train/' + (path_file.split('/')[-1]).split('.')[0] + '.png') test.to_csv('data/data_0930_merge_2/data_unmerge/test/' + path_file.split('/')[-1], index=False) EDA_data_label( 'data/data_0930_merge_2/data_unmerge/test/' + path_file.split('/')[-1], 'data/data_0930_merge_2/data_unmerge/test/' + (path_file.split('/')[-1]).split('.')[0] + '.png') else: train.to_csv('data/data_0930_merge_2/data_unmerge/test/' + path_file.split('/')[-1], index=False) EDA_data_label( 'data/data_0930_merge_2/data_unmerge/test/' + path_file.split('/')[-1], 'data/data_0930_merge_2/data_unmerge/test/' + (path_file.split('/')[-1]).split('.')[0] + '.png') test.to_csv('data/data_0930_merge_2/data_unmerge/train/' + path_file.split('/')[-1], index=False) EDA_data_label( 'data/data_0930_merge_2/data_unmerge/train/' + path_file.split('/')[-1], 'data/data_0930_merge_2/data_unmerge/train/' + (path_file.split('/')[-1]).split('.')[0] + '.png')
for img in image_list: img = img.replace("unmerge", "merge_1") yield(folder + img + '.png') # data_words = list(sent_to_words(df_train['Review'])) # ten anh tuong ung def label(path_list, folder, path_file_csv, file_name): my_array2 = np.genfromtxt(path_list, dtype=str, skip_header=0) image_list = list(create_path_image(folder, my_array2)) df_info_image = pd.DataFrame() try: df_info_image = extract_info_image(image_list) df_info_image.to_csv(path_file_csv) EDA_data_label(path_file_csv, file_name) except Exception: print(Exception) return df_info_image path = 'data_0930_merge_1_tmp/Segmentation/' folder = 'data_0930_merge_1_tmp/masks/' path_file_text = extract_all_path(path, 'txt') for path_list in path_file_text: save_path = 'data/data_0930_merge_1/' + (str(path_list).split('/')[-1]).split('.')[0] path_file_csv = save_path + '.csv' file_name = save_path + '.png' df_info_inmage = label(path_list, folder, path_file_csv, file_name) print(df_info_inmage)
import os import pandas as pd from extract_information_image import extract_all_path path = [] path_file = extract_all_path('crawl_data/pngimg.com1/', 'png') list_str = ['_', '.'] # list_str = ['_PNG_', '_Transparent', '_Background', '_Download'] # for pt in path_file: # path.append((pt.split('/'))[-1].split('_')[0]) for pt in path_file: for x in list_str: if x in pt: pa = (pt.split('/')[-1]).split(x)[0] pt = pa path.append(pa) df = pd.DataFrame() df['Path'] = path_file df['Category'] = path df.to_csv('crawl_data/path_file_pngimg.csv', index=False)