def get_grondexploitatie_files(): """ Download the grondexploitatie files """ file_list = [] meta_data = objectstore.get_full_container_list(grondexploitatie_conn, 'grondexploitatie') for o_info in meta_data: for expected_file_max_age in EXPECTED_FILES_AND_MAX_AGE: expected_file = expected_file_max_age[0] max_age = expected_file_max_age[1] if not o_info['name'].endswith(expected_file): continue dt = parser.parse(o_info['last_modified']) now = datetime.datetime.now() delta = now - dt log.debug('AGE: %d %s', delta.days, expected_file) if max_age > 0: if delta.days > max_age: log.error('DELIVERY IMPORTED FILES ARE TOO OLD!') raise ValueError log.debug('%s %s', expected_file, dt) file_list.append((dt, o_info)) download_files(file_list)
def download_container(conn, container, targetdir): # list of container's content content = objectstore.get_full_container_list(conn, container['name']) # loop over files for obj in content: # check if object type is not application or dir, or a "part" file if obj['content_type'] == 'application/directory': logger.debug('skipping dir') continue if 'part' in obj['name']: logger.debug('skipping part') continue # target filename of object target_filename = os.path.join(targetdir, obj['name']) if file_exists(target_filename): logger.debug('skipping %s, file already exists', target_filename) continue # write object in target file with open(target_filename, 'wb') as new_file: _, obj_content = conn.get_object(container['name'], obj['name']) new_file.write(obj_content)
def get_latest_hr_files(): """ Download the expected files provided by mks / kpn """ file_list = [] meta_data = objectstore.get_full_container_list(handelsregister_conn, 'handelsregister') for o_info in meta_data: for expected_file in EXPECTED_FILES: if not o_info['name'].endswith(expected_file): continue dt = parser.parse(o_info['last_modified']) now = datetime.datetime.now() delta = now - dt log.debug('AGE: %d %s', delta.days, expected_file) if delta.days > 10: log.error('DELEVERY IMPORTED FILES ARE TOO OLD!') raise ValueError log.debug('%s %s', expected_file, dt) file_list.append((dt, o_info)) download_files(file_list)
def download_database( connection, container: str, ): """ Download database dump """ meta_data = objectstore.get_full_container_list(connection, container, prefix='database') for o_info in meta_data: expected_file = f'database.{ENV}.dump' if o_info['name'].endswith(expected_file): dt = dateparser.parse(o_info['last_modified']) now = datetime.datetime.now() delta = now - dt log.debug('AGE: %d %s', delta.days, expected_file) log.debug('Downloading: %s', (expected_file)) new_data = objectstore.get_object(connection, o_info, container) # save output to file! with open('data/{}'.format(expected_file), 'wb') as outputzip: outputzip.write(new_data)
def download_container(conn, container, datadir): logger.debug('Downloading dataset: %s', container['name']) target_dir = os.path.join(datadir, container['name']) os.makedirs(target_dir, exist_ok=True) content = objectstore.get_full_container_list(conn, container['name']) for obj in content: target_filename = os.path.join(target_dir, obj['name']) with open(target_filename, 'wb') as new_file: _, obj_content = conn.get_object(container['name'], obj['name']) new_file.write(obj_content)
def get_wba_documents_list(self, connection) -> DocumentList: """ Get list of wba list documents from object store. :param connection: swiftclient connection :return: Array of documents in the form: [('rapportage', 'QE1_rapportage_Some_where - some extra info.pdf'), ... ] """ documents_meta = get_full_container_list(connection, DOC_CONTAINER_NAME) documents_paths = [ meta.get('name') for meta in documents_meta if meta.get('content_type') != DIR_CONTENT_TYPE ] return list(map(os.path.split, documents_paths))
def get_all_files(): connection = get_objectstore_connection() container_name = settings.BOUWDOSSIERS_OBJECTSTORE_CONTAINER os.makedirs(settings.DATA_DIR, exist_ok=True) documents_meta = get_full_container_list(connection, container_name) for meta in documents_meta: if meta.get('content_type') != DIR_CONTENT_TYPE: name = meta.get('name') last_modified = meta.get('last_modified') dt = time.strptime(last_modified, "%Y-%m-%dT%H:%M:%S.%f") epoch_dt = timegm(dt) output_path = os.path.join(settings.DATA_DIR, name) dirname = os.path.dirname(output_path) os.makedirs(dirname, exist_ok=True) if os.path.isfile(output_path) and epoch_dt == os.path.getmtime( output_path): log.info(f"Using cached file: {output_path}") else: log.info(f"Fetching file: {output_path}") new_data = connection.get_object(container_name, name)[1] with open(output_path, 'wb') as file: file.write(new_data) os.utime(output_path, (epoch_dt, epoch_dt))
def download_container(conn, container, datadir): log.debug('Downloading dataset: %s', container['name']) target_dir = os.path.join(datadir, container['name']) os.makedirs(target_dir, exist_ok=True) content = objectstore.get_full_container_list(conn, container['name']) for obj_meta in content: target_filename = os.path.join(target_dir, obj_meta['name']) if file_exists(target_filename): # Already downloaded log.debug('Already downloaded %s', target_filename) continue # TODO age check! with open(target_filename, 'wb') as new_file: _, obj_content = conn.get_object(container['name'], obj_meta['name']) new_file.write(obj_content)
logging.getLogger("requests").setLevel(logging.WARNING) logging.getLogger("urllib3").setLevel(logging.WARNING) logging.getLogger("swiftclient").setLevel(logging.WARNING) _conn = objectstore.get_connection(OBJECTSTORE) def _files_in_source_dir(dir): return list( filter(lambda f: os.path.isfile(f), glob.glob(f'{dir}/**', recursive=True))) if __name__ == "__main__": file_list = objectstore.get_full_container_list(_conn, SCHEMAS_CONTAINER, prefix=ENVIRONMENT + "/") last_modified = max([item['last_modified'] for item in file_list]) source_dir = os.path.dirname(os.path.realpath(__file__)) + "/target" files = _files_in_source_dir(source_dir) for file in files: target_name = file.replace(source_dir, ENVIRONMENT) with open(file, 'rb') as fileobject: objectstore.put_object(_conn, SCHEMAS_CONTAINER, target_name, fileobject, content_type='application/json') file_list = objectstore.get_full_container_list(_conn,