def _extract_images(self): """extract the downloaded images """ if self._verbose: _L("Extracting the images in " + _P(self._im_extracted_path)) os.system("tar xvfj {} -C {} {}".format( self._im_tar_path, self._path, ">/dev/null 2>&1" if self._verbose == False else " ", ))
def _resolve_dirs(self): """Resolve directories, delete old directories and create new ones """ if self._verbose: _L("Resolving directories") # Del existing directories os.system("rm -rf {} {}".format(self._im_test_path, self._im_train_path)) # Rename the intermediate folder to test_images os.system("mv {} {}".format(self._im_extracted_path, self._im_test_path)) os.makedirs(self._im_train_path, exist_ok=True)
def _process_images(self): # Process the downloaded dataset, split the images into test & train # directories, and remove the intermediate files. # # The dataset is split according to the list provided in the files in # dataset with names test.txt and train.txt. # # Seperate files according to train.txt and test.txt if self._verbose: _L("Seperating files from {} to {}".format(_P( self._im_test_path), _S(self._im_train_path))) with open(self._im_train_list_path) as training_images_list: for image in [line.rstrip("\n") for line in training_images_list]: # mv files from from_ to to_ from_ = os.path.join(self._im_test_path, image) + ".png" to_ = os.path.join(self._im_train_path, image) + ".png" os.system("mv {} {}".format(from_, to_)) if self._verbose: _L("{} moved to {}".format(_P(from_), _S(to_))) if self._verbose: _L("Extracted " + _P(len(os.listdir(self._im_test_path))) + " Images in " + _S(self._im_test_path)) _L("Extracted " + _P(len(os.listdir(self._im_train_path))) + " Images in " + _S(self._im_train_path))
def download_dataset(urls, path): """ Download dataset from the web Args: urls (dic) : urls to download the dataset path (string) : path where the dataset will be downloaded """ # check if the path exist or not os.makedirs(os.path.normpath(path), exist_ok=True) # Download the dataset for key in urls: _L("Downloading " + _P(urls[key]) + " in " + _S(path)) # if (urls[key].split('.')[-1] != 'tar'): os.system("wget {} -P {}".format(urls[key], path))
def __init__(self, path="./data/daquar", force=False, verbose=False): """Construct a brand new Dqauar Data Folder Args: path (str, optional): folders path. Defaults to "./data/daquar". force (bool, optional): to force download. Defaults to False. verbose (bool, optional): detailed logs. Defaults to False. """ self._path = os.path.abspath(path) self._force = force self._verbose = verbose self._urls = DAQUAR_URLS self._IM_DIR_TEST = "test_images" self._IM_DIR_TRAIN = "train_images" self._QA_JSON_TRAIN = "qa_train.json" ############### Paths for image directories and files ############### # images self._im_extracted_path = os.path.join( self._path, self._urls[DAQUAR_IM].split("/")[-1].split(".")[0]) self._im_test_path = os.path.join(self._path, self._IM_DIR_TEST) self._im_train_path = os.path.join(self._path, self._IM_DIR_TRAIN) self._im_tar_path = os.path.join(self._path, self._urls[DAQUAR_IM].split("/")[-1]) self._im_train_list_path = os.path.join( self._path, self._urls[DAQUAR_IM_TRAIN].split("/")[-1]) # qa pairs self._qa_train_txt_path = os.path.join( self._path, self._urls[DAQUAR_QA_TRAIN].split("/")[-1]) self._qa_train_json_path = os.path.join(self._path, self._QA_JSON_TRAIN) # logging if verbose is set to true if self._verbose: _L("Images .tar path " + _P(self._im_tar_path)) _L("Images extraction path " + _P(self._im_extracted_path)) _L("Test images path " + _P(self._im_test_path)) _L("Train images path " + _P(self._im_train_path)) _L("Train pairs text " + _P(self._qa_train_txt_path)) _L("Train processed json " + _P(self._qa_train_json_path)) _L("Images train list path" + _P(self._im_train_list_path)) self.paths = { self._IM_DIR_TEST: self._im_test_path, self._IM_DIR_TRAIN: self._im_train_path, "qa_train": self._qa_train_json_path, } if force or (os.path.exists(self._im_train_path) == False and os.path.exists(self._im_test_path) == False): self._download() self._extract_images() self._resolve_dirs() self._process_images() self._process_questions()
def _download(self): """Download the dataset from the web, urls are predefined in the config """ if self._verbose: _L("Downloading " + _P("DAQUAR") + " in " + _S(self._path))