예제 #1
0
def get_grondexploitatie_files():
    """
    Download the grondexploitatie files
    """
    file_list = []

    meta_data = objectstore.get_full_container_list(grondexploitatie_conn,
                                                    'grondexploitatie')

    for o_info in meta_data:
        for expected_file_max_age in EXPECTED_FILES_AND_MAX_AGE:
            expected_file = expected_file_max_age[0]
            max_age = expected_file_max_age[1]

            if not o_info['name'].endswith(expected_file):
                continue

            dt = parser.parse(o_info['last_modified'])
            now = datetime.datetime.now()

            delta = now - dt

            log.debug('AGE: %d %s', delta.days, expected_file)

            if max_age > 0:
                if delta.days > max_age:
                    log.error('DELIVERY IMPORTED FILES ARE TOO OLD!')
                    raise ValueError

            log.debug('%s %s', expected_file, dt)
            file_list.append((dt, o_info))

    download_files(file_list)
def download_container(conn, container, targetdir):
    # list of container's content
    content = objectstore.get_full_container_list(conn, container['name'])

    # loop over files
    for obj in content:
        # check if object type is not application or dir, or a "part" file
        if obj['content_type'] == 'application/directory':
            logger.debug('skipping dir')
            continue

        if 'part' in obj['name']:
            logger.debug('skipping part')
            continue

        # target filename of object
        target_filename = os.path.join(targetdir, obj['name'])

        if file_exists(target_filename):
            logger.debug('skipping %s, file already exists', target_filename)
            continue

        # write object in target file
        with open(target_filename, 'wb') as new_file:
            _, obj_content = conn.get_object(container['name'], obj['name'])
            new_file.write(obj_content)
예제 #3
0
def get_latest_hr_files():
    """
    Download the expected files provided by mks / kpn
    """
    file_list = []

    meta_data = objectstore.get_full_container_list(handelsregister_conn,
                                                    'handelsregister')

    for o_info in meta_data:
        for expected_file in EXPECTED_FILES:
            if not o_info['name'].endswith(expected_file):
                continue

            dt = parser.parse(o_info['last_modified'])
            now = datetime.datetime.now()

            delta = now - dt

            log.debug('AGE: %d %s', delta.days, expected_file)

            if delta.days > 10:
                log.error('DELEVERY IMPORTED FILES ARE TOO OLD!')
                raise ValueError

            log.debug('%s %s', expected_file, dt)
            file_list.append((dt, o_info))

    download_files(file_list)
예제 #4
0
def download_database(
    connection,
    container: str,
):
    """
    Download database dump
    """

    meta_data = objectstore.get_full_container_list(connection,
                                                    container,
                                                    prefix='database')

    for o_info in meta_data:
        expected_file = f'database.{ENV}.dump'
        if o_info['name'].endswith(expected_file):
            dt = dateparser.parse(o_info['last_modified'])
            now = datetime.datetime.now()

            delta = now - dt

            log.debug('AGE: %d %s', delta.days, expected_file)

            log.debug('Downloading: %s', (expected_file))

            new_data = objectstore.get_object(connection, o_info, container)

        # save output to file!
        with open('data/{}'.format(expected_file), 'wb') as outputzip:
            outputzip.write(new_data)
def download_container(conn, container, datadir):
    logger.debug('Downloading dataset: %s', container['name'])
    target_dir = os.path.join(datadir, container['name'])
    os.makedirs(target_dir, exist_ok=True)

    content = objectstore.get_full_container_list(conn, container['name'])
    for obj in content:
        target_filename = os.path.join(target_dir, obj['name'])
        with open(target_filename, 'wb') as new_file:
            _, obj_content = conn.get_object(container['name'], obj['name'])
            new_file.write(obj_content)
예제 #6
0
 def get_wba_documents_list(self, connection) -> DocumentList:
     """
     Get list of wba list documents from object store.
     :param connection: swiftclient connection
     :return: Array of documents in the form:
     [('rapportage', 'QE1_rapportage_Some_where - some extra info.pdf'), ... ]
     """
     documents_meta = get_full_container_list(connection, DOC_CONTAINER_NAME)
     documents_paths = [
         meta.get('name') for meta in documents_meta if
         meta.get('content_type') != DIR_CONTENT_TYPE
     ]
     return list(map(os.path.split, documents_paths))
def get_all_files():
    connection = get_objectstore_connection()
    container_name = settings.BOUWDOSSIERS_OBJECTSTORE_CONTAINER
    os.makedirs(settings.DATA_DIR, exist_ok=True)
    documents_meta = get_full_container_list(connection, container_name)
    for meta in documents_meta:
        if meta.get('content_type') != DIR_CONTENT_TYPE:
            name = meta.get('name')
            last_modified = meta.get('last_modified')
            dt = time.strptime(last_modified, "%Y-%m-%dT%H:%M:%S.%f")
            epoch_dt = timegm(dt)
            output_path = os.path.join(settings.DATA_DIR, name)
            dirname = os.path.dirname(output_path)
            os.makedirs(dirname, exist_ok=True)
            if os.path.isfile(output_path) and epoch_dt == os.path.getmtime(
                    output_path):
                log.info(f"Using cached file: {output_path}")
            else:
                log.info(f"Fetching file: {output_path}")
                new_data = connection.get_object(container_name, name)[1]
                with open(output_path, 'wb') as file:
                    file.write(new_data)
                os.utime(output_path, (epoch_dt, epoch_dt))
예제 #8
0
def download_container(conn, container, datadir):

    log.debug('Downloading dataset: %s', container['name'])

    target_dir = os.path.join(datadir, container['name'])
    os.makedirs(target_dir, exist_ok=True)

    content = objectstore.get_full_container_list(conn, container['name'])

    for obj_meta in content:

        target_filename = os.path.join(target_dir, obj_meta['name'])

        if file_exists(target_filename):
            # Already downloaded
            log.debug('Already downloaded %s', target_filename)
            continue

        # TODO age check!

        with open(target_filename, 'wb') as new_file:
            _, obj_content = conn.get_object(container['name'],
                                             obj_meta['name'])
            new_file.write(obj_content)
예제 #9
0
logging.getLogger("requests").setLevel(logging.WARNING)
logging.getLogger("urllib3").setLevel(logging.WARNING)
logging.getLogger("swiftclient").setLevel(logging.WARNING)

_conn = objectstore.get_connection(OBJECTSTORE)


def _files_in_source_dir(dir):
    return list(
        filter(lambda f: os.path.isfile(f),
               glob.glob(f'{dir}/**', recursive=True)))


if __name__ == "__main__":
    file_list = objectstore.get_full_container_list(_conn,
                                                    SCHEMAS_CONTAINER,
                                                    prefix=ENVIRONMENT + "/")
    last_modified = max([item['last_modified'] for item in file_list])

    source_dir = os.path.dirname(os.path.realpath(__file__)) + "/target"
    files = _files_in_source_dir(source_dir)
    for file in files:
        target_name = file.replace(source_dir, ENVIRONMENT)
        with open(file, 'rb') as fileobject:
            objectstore.put_object(_conn,
                                   SCHEMAS_CONTAINER,
                                   target_name,
                                   fileobject,
                                   content_type='application/json')

    file_list = objectstore.get_full_container_list(_conn,