def load_files_in_app(dataset): logger.info("start loading files in application") location = dataset.location file_name_pattern = dataset.associated_file files = load_files(location, file_name_pattern) # for the correspoding dataset load the files that were not # loaded in the database (new files) dataset_files = DataSetFile.objects.filter(data_set=dataset) file_name_set = set([f.name for f in dataset_files]) for f in files: if f in file_name_set: continue dataset_file = DataSetFile(name=f, data_set=dataset) dataset_file.save() logger.info("files loaded")
def load_files_into_dataset(dataset_name): data_set = DataSet.objects.filter(name=dataset_name).first() folder = data_set.location if data_set.associated_file is not None: # filter the file according to the pattern compiled_pattern = re.compile(data_set.associated_file) files_in_location = [ f for f in os.listdir(folder) if os.path.isfile(os.path.join(folder, f)) and compiled_pattern.search(f) is not None ] else: files_in_location = [f for f in os.listdir(folder) if os.path.isfile(os.path.join(folder, f))] for file_name in files_in_location: # check if it is already loaded data_set_file_count = DataSetFile.objects.filter(name=file_name).count() if data_set_file_count == 1: logger.warning("Warning: " + file_name + " already loaded in dataset: " + data_set.name) continue data_set_file = DataSetFile(name=file_name, data_set=data_set) data_set_file.save()