Ejemplo n.º 1
0
def load_files_in_app(dataset):
    logger.info("start loading files in application")
    location = dataset.location
    file_name_pattern = dataset.associated_file
    files = load_files(location, file_name_pattern)
    # for the correspoding dataset load the files that were not
    # loaded in the database (new files)
    dataset_files = DataSetFile.objects.filter(data_set=dataset)
    file_name_set = set([f.name for f in dataset_files])

    for f in files:
        if f in file_name_set:
            continue
        dataset_file = DataSetFile(name=f, data_set=dataset)
        dataset_file.save()
    logger.info("files loaded")
Ejemplo n.º 2
0
def load_files_into_dataset(dataset_name):
    data_set = DataSet.objects.filter(name=dataset_name).first()
    folder = data_set.location

    if data_set.associated_file is not None:
        # filter the file according to the pattern
        compiled_pattern = re.compile(data_set.associated_file)
        files_in_location = [
            f
            for f in os.listdir(folder)
            if os.path.isfile(os.path.join(folder, f)) and compiled_pattern.search(f) is not None
        ]
    else:
        files_in_location = [f for f in os.listdir(folder) if os.path.isfile(os.path.join(folder, f))]

    for file_name in files_in_location:
        # check if it is already loaded
        data_set_file_count = DataSetFile.objects.filter(name=file_name).count()
        if data_set_file_count == 1:
            logger.warning("Warning: " + file_name + " already loaded in dataset: " + data_set.name)
            continue
        data_set_file = DataSetFile(name=file_name, data_set=data_set)
        data_set_file.save()