def __init__(self, executor_uri, cpu_limit, mem_limit, push_registry, staging_uri, stream=False, verbose=False, repository=None, pull_registry=None, docker_host=None, container_image=None, insecure_registries=False, max_retries=3): self.executor_uri = executor_uri self.cpu = float(cpu_limit) self.mem = int(mem_limit) self.push_registry = push_registry self.pull_registry = pull_registry self.staging_uri = staging_uri self.stream = stream self.verbose = verbose self.repository = repository self.docker_host = docker_host self.container_image = container_image self.insecure_registries = insecure_registries self.max_retries = max_retries self.pending = 0 self.running = 0 self.finished = 0 self.failed = 0 self.queued_tasks = [] self.task_status = defaultdict(lambda: None) self.task_history = {} self.task_retries = defaultdict(int) self.blacklist = set() self._processing_offers = threading.Lock() self._processing_queue = threading.Lock() # Ensure the staging directory exists self.filesystem = None if self.staging_uri: staging_uri = urlparse(self.staging_uri) staging_fs = opener.opendir( "%s://%s/" % (staging_uri.scheme, staging_uri.netloc)) staging_fs.makedir(staging_uri.path.lstrip("/"), recursive=True, allow_recreate=True) self.filesystem = opener.opendir(self.staging_uri) self.cleanup = TaskCleanupThread(self.filesystem) self.cleanup.start()
def put(self): filename = parse_options_header( request.headers.get('Content-Disposition', ''))[1].get('filename') d = Document.create({'deleted': False}) opener.opendir(directory_name(d), create_dir=True) d.setcontents( request.stream, name=lambda s: os.path.join(directory_name(s), secure_filename(filename)) ) return d.dumps()
def __init__(self, tasks, executor_uri, cpu_limit, mem_limit, push_registry, staging_uri, stream=False, verbose=False, repository=None, pull_registry=None, docker_host=None, container_image=None, insecure_registries=False): self.executor_uri = executor_uri self.cpu = float(cpu_limit) self.mem = int(mem_limit) self.push_registry = push_registry self.pull_registry = pull_registry self.staging_uri = staging_uri self.stream = stream self.verbose = verbose self.repository = repository self.docker_host = docker_host self.container_image = container_image self.insecure_registries = insecure_registries self.queued_tasks = [] for path, tags in tasks: dockerfile = parse_dockerfile(path, registry=pull_registry) self.queued_tasks.append((path, dockerfile, tags)) self.pending = len(self.queued_tasks) self.running = 0 self.finished = 0 self.failed = 0 self.task_ids = {} self.processing_offers = threading.Lock() # Ensure the staging directory exists self.filesystem = None if self.staging_uri: staging_uri = urlparse(self.staging_uri) staging_fs = opener.opendir( "%s://%s/" % (staging_uri.scheme, staging_uri.netloc)) staging_fs.makedir(staging_uri.path.lstrip("/"), recursive=True, allow_recreate=True) self.filesystem = opener.opendir(self.staging_uri)
def put(self, oauth): filename = parse_options_header( request.headers.get('Content-Disposition', ''))[1].get('filename') d = Document.create({'deleted': False}) opener.opendir(directory_name(d), create_dir=True) d.setcontents( request.stream, name=lambda s: os.path.join(directory_name(s), secure_filename(filename)) ) return d.dumps()
def __init__(self, executor_uri, cpu_limit, mem_limit, push_registry, staging_uri, stream=False, verbose=False, repository=None, pull_registry=None, docker_host=None, container_image=None, insecure_registries=False, max_retries=3): self.executor_uri = executor_uri self.cpu = float(cpu_limit) self.mem = int(mem_limit) self.push_registry = push_registry self.pull_registry = pull_registry self.staging_uri = staging_uri self.stream = stream self.verbose = verbose self.repository = repository self.docker_host = docker_host self.container_image = container_image self.insecure_registries = insecure_registries self.max_retries = max_retries self.pending = 0 self.running = 0 self.finished = 0 self.failed = 0 self.queued_tasks = [] self.task_status = defaultdict(lambda: None) self.task_history = {} self.task_retries = defaultdict(int) self.blacklist = set() self._processing_offers = threading.Lock() self._processing_queue = threading.Lock() # Ensure the staging directory exists self.filesystem = None if self.staging_uri: staging_uri = urlparse(self.staging_uri) staging_fs = opener.opendir( "%s://%s/" % (staging_uri.scheme, staging_uri.netloc) ) staging_fs.makedir( staging_uri.path.lstrip("/"), recursive=True, allow_recreate=True ) self.filesystem = opener.opendir(self.staging_uri) self.cleanup = TaskCleanupThread(self.filesystem) self.cleanup.start()
def __init__(self, tasks, executor_uri, cpu_limit, mem_limit, push_registry, staging_uri, stream=False, verbose=False, repository=None, pull_registry=None, docker_host=None, container_image=None, insecure_registries=False): self.executor_uri = executor_uri self.cpu = float(cpu_limit) self.mem = int(mem_limit) self.push_registry = push_registry self.pull_registry = pull_registry self.staging_uri = staging_uri self.stream = stream self.verbose = verbose self.repository = repository self.docker_host = docker_host self.container_image = container_image self.insecure_registries = insecure_registries self.queued_tasks = [] for path, tags in tasks: dockerfile = parse_dockerfile(path, registry=pull_registry) self.queued_tasks.append((path, dockerfile, tags)) self.pending = len(self.queued_tasks) self.running = 0 self.finished = 0 self.failed = 0 self.task_ids = {} self._processing_offers = threading.Lock() # Ensure the staging directory exists self.filesystem = None if self.staging_uri: staging_uri = urlparse(self.staging_uri) staging_fs = opener.opendir( "%s://%s/" % (staging_uri.scheme, staging_uri.netloc) ) staging_fs.makedir( staging_uri.path.lstrip("/"), recursive=True, allow_recreate=True ) self.filesystem = opener.opendir(self.staging_uri) self.cleanup = TaskCleanupThread(self.filesystem) self.cleanup.start()
def _get_fs(self, create_dir=True): """.""" filedir = dirname(self.fileurl) filename = basename(self.fileurl) return (opener.opendir(filedir, writeable=True, create_dir=create_dir), filename)
def test_verify_remote_scroll(self): pyfs = opener.opendir(settings.config['fs_path']) signing_key = crypto.generate_signing_key() scroll_id = sha1_hexdigest s = scroll.RemoteScroll(pyfs, scroll_id, signing_key.verify_key.encode(crypto.HexEncoder)) s.get_verify_key = lambda y: signing_key.verify_key items = [] for i in range(10): item_hash = HASH(i) s.add(item_hash) items.append(item_hash) four, five = s.slice(4, 2) self.assertEqual(four, HASH(4)) self.assertEqual(five, HASH(5)) new_item_hash = HASH(i) new_state = HASH(s.state + new_item_hash) update = new_item_hash + new_state signed_update = signing_key.sign(update) self.assertEqual(s.verify_update(signed_update), update)
def test_put_get_pool(self): my_data = "B"*20 scroll_id = sha1_hexdigest pyfs = opener.opendir(settings.config['fs_path']) p = pool.Pool(pyfs) hash_bytes = p.put(my_data) self.assertEqual(p.get(hash_bytes), my_data)
def archive_fs(locations): """Fixture to check the BagIt file generation.""" archive_path = locations['archive'].uri fs = opener.opendir(archive_path, writeable=False, create_dir=True) yield fs for d in fs.listdir(): fs.removedir(d, force=True)
def test_put_get_pool(self): my_data = "B" * 20 scroll_id = sha1_hexdigest pyfs = opener.opendir(settings.config['fs_path']) p = pool.Pool(pyfs) hash_bytes = p.put(my_data) self.assertEqual(p.get(hash_bytes), my_data)
def _get_fs(self, create_dir=True): """Return tuple with filesystem and filename.""" filedir = dirname(self.fileurl) filename = basename(self.fileurl) return (opener.opendir(filedir, writeable=True, create_dir=create_dir), filename)
def test_verify_remote_scroll(self): pyfs = opener.opendir(settings.config['fs_path']) signing_key = crypto.generate_signing_key() scroll_id = sha1_hexdigest s = scroll.RemoteScroll( pyfs, scroll_id, signing_key.verify_key.encode(crypto.HexEncoder)) s.get_verify_key = lambda y: signing_key.verify_key items = [] for i in range(10): item_hash = bytes_hash_int(i) s.add(item_hash) items.append(item_hash) four, five = s.slice(4, 2) self.assertEqual(four, bytes_hash_int(4)) self.assertEqual(five, bytes_hash_int(5)) new_item_hash = bytes_hash_int(i) new_state = settings.HASH(s.state + new_item_hash) update = new_item_hash + new_state signed_update = signing_key.sign(update) self.assertEqual(s.verify_update(signed_update), update)
def test_add_one(self): scroll_id = 'test_add' data1 = HASH( '1' ) pyfs = opener.opendir(settings.config['fs_path']) s = scroll.Scroll(pyfs, scroll_id) s.add(data1) contents = pyfs.getcontents(scroll_id + '.scroll') self.assertEqual(contents, data1)
def test_save_scroll(self): item_hash = twenty_bytes scroll_id = sha1_hexdigest pyfs = opener.opendir(settings.config['fs_path']) s = scroll.Scroll(pyfs, scroll_id) s.add(item_hash) self.assertTrue(s.has(item_hash)) contents = pyfs.getcontents(scroll_id + '.scroll') self.assertEqual(contents, item_hash)
def _get_fs(self, create_dir=True): """.""" filedir = dirname(self.fileurl) filename = basename(self.fileurl) return ( opener.opendir(filedir, writeable=True, create_dir=create_dir), filename )
def _get_fs(self, create_dir=True): """Return tuple with filesystem and filename.""" filedir = dirname(self.fileurl) filename = basename(self.fileurl) return ( opener.opendir(filedir, writeable=True, create_dir=create_dir), filename )
def test_sign(self): my_message = 'Do it Like La La La!' pyfs = opener.opendir(settings.config['fs_path']) fingerprint = notary.Notary.generate(pyfs) publisher = None n = notary.Notary(publisher, pyfs, fingerprint) signed_message = n.signing_key.sign(my_message) verify_key = n.signing_key.verify_key self.assertEqual(verify_key.verify(signed_message), my_message)
def _process_files(self, metadata): """Transfer files in a list from one ``fs`` object to another. All tranferer files will maintain the same filename. """ # FS object created at the folder of where the record specific bag is fs_dest = opener.opendir(self.folder, "files", create_dir=True) files_to_upload = [] for file_to_upload in metadata.get("files", []): dirname_ = os.path.dirname(file_to_upload["path"]) basename_ = os.path.basename(file_to_upload["path"]) fs_src = opener.opendir(dirname_) copyfile(fs_src, basename_, fs_dest, basename_) file_to_upload["path"] = os.path.join("files", basename_) del file_to_upload["url"] files_to_upload.append(file_to_upload) metadata["files_to_upload"] = files_to_upload return metadata
def test_load_scroll(self): item_hash = twenty_bytes scroll_id = sha1_hexdigest pyfs = opener.opendir(settings.config['fs_path']) with open(scroll_id + '.scroll', 'w+') as f: f.write(item_hash) s = scroll.Scroll(pyfs, scroll_id) s.load() self.assertTrue(s.has(item_hash))
def save(self, incoming_stream, size=None, chunk_size=None): """Save file in the file system.""" uri = self.obj.bucket.location.uri path = self.make_path() with opener.opendir(uri) as fs: dest_file = fs.makeopendir(path, recursive=True).open('data', 'wb') bytes_written, checksum = self._save_stream( incoming_stream, dest_file, chunk_size=chunk_size) dest_file.close() return join(uri, path, 'data'), bytes_written, checksum
def transfer_data_files(self): """Transfer files in a list from one ``fs`` object to another. All tranferer files will maintain the same filename. """ # FS object created at the folder of where the record specific bag is fs_dest = opener.opendir(self.bagit_folder_path, create_dir=True) try: for docfile in self.docfiles: fs_src = opener.opendir(os.path.dirname(docfile.get_path())) copyfile(fs_src, os.path.basename(docfile.get_path()), fs_dest, os.path.basename(docfile.get_path())) except Exception: raise ArchiverError("Error while copying %s to %s" % (docfile.get_path(), os.path.join(self.bagit_folder_path, docfile.name + docfile.format))) return True
def test_slice(self): pyfs = opener.opendir(settings.config['fs_path']) scroll_id = sha1_hexdigest s = scroll.Scroll(pyfs, scroll_id) items = [] for i in range(10): item_hash = HASH(i) s.add(item_hash) items.append(item_hash) four, five = s.slice(4, 2) self.assertEqual(four, HASH(4)) self.assertEqual(five, HASH(5))
def test_slice(self): pyfs = opener.opendir(settings.config['fs_path']) scroll_id = sha1_hexdigest s = scroll.Scroll(pyfs, scroll_id) items = [] for i in range(10): item_hash = bytes_hash_int(i) s.add(item_hash) items.append(item_hash) four, five = s.slice(4, 2) self.assertEqual(four, bytes_hash_int(4)) self.assertEqual(five, bytes_hash_int(5))
def test_put_error(client, bucket): """Test upload - cancelled by user.""" object_url = url_for('invenio_files_rest.object_api', bucket_id=bucket.id, key='test.txt') pytest.raises(ValueError, client.put, object_url, input_stream=BadBytesIO(b'a' * 128)) assert FileInstance.query.count() == 0 assert ObjectVersion.query.count() == 0 # Ensure that the file was removed. assert len(list(opener.opendir(bucket.location.uri).walk('.'))) == 3
def test_load(self): scroll_id = 'test_load' pyfs = opener.opendir(settings.config['fs_path']) data1 = HASH( '2' ) data2 = HASH( '3' ) with open(scroll_id + '.scroll', 'w+') as f: f.write( data1 + data2 ) s = scroll.Scroll(pyfs, scroll_id) self.assertTrue(s.has(data1)) self.assertTrue(s.has(data2)) self.assertFalse(s.has('1')) self.assertFalse(s.has('')) expected_state = HASH( HASH( HASH( scroll_id ) + data1 ) + data2 ) self.assertEqual(s.state, expected_state)
def save(self, incoming_stream, chunk_size=None, progress_callback=None): """Save file in the file system.""" fs = opener.opendir(self.file.uri or self.make_path(), create_dir=True) fp = fs.open(self.filename, 'wb') try: bytes_written, checksum = self._write_stream( incoming_stream, fp, chunk_size=chunk_size, progress_callback=progress_callback) finally: fp.close() uri = fs.getpathurl(self.filename, allow_none=True) or \ fs.getsyspath(self.filename, allow_none=True) return uri, bytes_written, checksum
def test_put_error(client, bucket): """Test upload - cancelled by user.""" object_url = url_for( 'invenio_files_rest.object_api', bucket_id=bucket.id, key='test.txt') pytest.raises( ValueError, client.put, object_url, input_stream=BadBytesIO(b'a' * 128) ) assert FileInstance.query.count() == 0 assert ObjectVersion.query.count() == 0 # Ensure that the file was removed. assert len(list(opener.opendir(bucket.location.uri).walk('.'))) == 3
def test_save_big_scroll(self): pyfs = opener.opendir(settings.config['fs_path']) scroll_id = sha1_hexdigest s = scroll.Scroll(pyfs, scroll_id) items = [] for i in range(100): item_hash = bytes_hash_int(i) s.add(item_hash) items.append(item_hash) for item_hash in items: self.assertTrue(s.has(item_hash)) contents = pyfs.getcontents(scroll_id + '.scroll') self.assertEqual(contents, ''.join(items))
def test_add(self): my_data = "B"*20 pyfs = opener.opendir(settings.config['fs_path']) scroll_id = sha1_hexdigest chissel_set_id = 'spam' fingerprint = notary.Notary.generate(pyfs) chisel_set = notary.ChiselSet(pyfs, chissel_set_id, fingerprint) hash_bytes = chisel_set.add(my_data) self.assertEqual(chisel_set.pool.get(hash_bytes), my_data) self.assertTrue(chisel_set.scroll.has(hash_bytes)) self.assertTrue(chisel_set.has(hash_bytes))
def test_add_many(self): pyfs = opener.opendir(settings.config['fs_path']) scroll_id = sha1_hexdigest s = scroll.Scroll(pyfs, scroll_id) items = [] for i in range(100): item_hash = HASH(i) s.add(item_hash) items.append(item_hash) for item_hash in items: self.assertTrue(s.has(item_hash)) contents = pyfs.getcontents(scroll_id + '.scroll') self.assertEqual(contents, ''.join(items))
def test_add(self): my_data = "B" * 20 pyfs = opener.opendir(settings.config['fs_path']) scroll_id = sha1_hexdigest chissel_set_id = 'spam' fingerprint = notary.Notary.generate(pyfs) chisel_set = notary.ChiselSet(pyfs, chissel_set_id, fingerprint) hash_bytes = chisel_set.add(my_data) self.assertEqual(chisel_set.pool.get(hash_bytes), my_data) self.assertTrue(chisel_set.scroll.has(hash_bytes)) self.assertTrue(chisel_set.has(hash_bytes))
def test_load_artificial(self): """ This test writes its own scroll to test scroll loading """ scroll_id = 'test_load' data1 = HASH( '2' ) data2 = HASH( '3' ) expected_state = HASH( HASH( HASH( scroll_id ) + data1 ) + data2 ) pyfs = opener.opendir(settings.config['fs_path']) with open(scroll_id + '.scroll', 'w+') as f: f.write( data1 + data2 ) with open(scroll_id + '.state', 'w') as f: f.write( expected_state ) s = scroll.Scroll(pyfs, scroll_id) self.assertTrue(s.has(data1)) self.assertTrue(s.has(data2)) self.assertFalse(s.has('1')) self.assertFalse(s.has('')) self.assertEqual(s.state, expected_state)
def path_to_fs(path): """Convert path to an FS object.""" return opener.opendir(path)
def archive_fs(locations): """File system for the archive location.""" archive_path = locations['archive'] fs = opener.opendir(archive_path, writeable=False, create_dir=True) return fs
def loadNotary(): pyfs = opener.opendir(settings.config['fs_path']) chissel_set_id = 'spam' notary_fingerprint = notary.Notary.generate(pyfs) return notary.Notary(SubscribeHandler, pyfs, notary_fingerprint)
def test_generate(self): pyfs = opener.opendir(settings.config['fs_path']) fingerprint = notary.Notary.generate(pyfs) publisher = None n = notary.Notary(publisher, pyfs, fingerprint)
def get_sample_project_mapping(basepath, samplesheet=None, suffix='.fastq.gz', absolute_paths=False, catch_undetermined=True): """ Given a path containing fastq.gz files, possibily nested in Project/Sample directories, return a data structure mapping fastq-samples to projects. TODO: The SampleSheet.csv may be used as a hint but is not required. :param basepath: Path to directory tree of fastq.gz files - eg, bcl2fastq output directory :type basepath: str :return: Dictionary lists, {project_id : [relative fastq.gz paths]} :rtype: OrderedDict """ from fs.opener import opener fq_files = [] with opener.opendir(basepath) as vfs: for fn in vfs.walkfiles(): if suffix in fn: fq_files.append(fn.lstrip('/').lstrip('\\')) fq_files = sorted(fq_files) project_mapping = OrderedDict() for fqpath in fq_files: project = '' fqfile = fqpath parts = Path(fqpath).parts if len(parts) == 3: project, sample_id, fqfile = map(str, parts) if len(parts) == 2: project, fqfile = map(str, parts) if len(parts) == 1: fqfile = str(parts[0]) if catch_undetermined and 'Undetermined' in fqfile: project = u'Undetermined_indices' # TODO: we currently don't deal with Project_ prefixes, really # the project ID doesn't include Project_. If we strip # this here, maybe we need to include the project directory # in the fastq paths so we can know the path and project id # - will require fixes to downstream code that # does join(bcl2fastq_output_dir, project_id, fastq_file) # TODO: also incorporate sample_id in this datastructure if project not in project_mapping: project_mapping[project] = [] if absolute_paths: fqpath = join(basepath, fqpath) project_mapping[project].append(fqpath) # TODO: Use the SampleSheet.csv to validate or hint # TODO: Also assign sample_id, sample_name, lane, read, etc # we could use parse_sample_info_from_filename for this, # and/or use the FASTQ header(s) return project_mapping
def _make_bag(self, **kwargs): """Turn a folder into bagit form and compress it.""" opener.opendir(self.folder, create_dir=True) info = {'Bagging-Date': datetime.now().strftime("%Y-%m-%d")} info.update(kwargs) return make_bag(self.folder, info)
def test_opendir(tmppath): """Test opendir.""" rooturl = mkurl(tmppath) fs = opener.opendir(rooturl + "/data") assert fs.listdir()
count, new = 0,0 for dirPath, filename in remote.walkFiles(): count += 1 if not self.has( filename, 0 ): new += 1 self.put( remote._fs.getcontents( "%s/%s" % (dirPath, filename) ) ) print "Saw %s files; copied %s that are new" % (count, new) if __name__ == "__main__": if len(sys.argv) < 2: print "usage: %s [put|ls|cat|test]" % os.path.basename( sys.argv[0] ) else: cmd = sys.argv[1] if cmd == 'test': print doctest.testmod( ) else: from fs.opener import opener chisel = Chisel( opener.opendir( sys.argv[2] ) ) if cmd == 'put': chisel.put( sys.stdin.read(), *map(int, sys.argv[3:]) ) elif cmd == 'ls': print "\n".join( "%s/%s" % (a,b) for a,b in chisel.walkFiles( *map(int, sys.argv[3:]) ) ), elif cmd == 'cat': print "".join( chisel.get( *map(int, sys.argv[3:]) ) ), elif cmd == 'sync': chisel.sync( Chisel( opener.opendir( sys.argv[3] ) ), *map(int, sys.argv[4:]) ) else: print "unknown command %s" % (cmd,)