コード例 #1
0
ファイル: test.py プロジェクト: rmp/picopore
def recursiveCheckEquivalent(file1, file2, name):
    obj1 = file1[name]
    obj2 = file1[name]
    # check attributes
    attr1 = obj1.attrs
    attr2 = obj2.attrs
    attrsName = "/".join([name, "attrs"])
    checkContents(attr1, attr2, attrsName)
    for key, value in attr1.items():
        if not attr2[key] == value:
            log("Failure: {} - file1={}, file2={}".format(
                "/".join([attrsName, key]), value, attr2[key]))
    # check subgroups / datasets
    if isGroup(obj1):
        checkContents(obj1, obj2)
        for key in obj1.keys():
            recursiveCheckEquivalent(file1, file2, "/".join([name, key]))
    else:
        if not obj1.shape == obj2.shape:
            log("Failure: {}.shape - file1={}, file2={}".format(
                name, obj1.shape, obj2.shape))
        if obj1.dtype.names is None:
            # just one column
            checkData(obj1, obj2, name)
        else:
            for col in obj1.dtype.names:
                checkData(obj1[col], obj2[col], ".".join([name, col]))
コード例 #2
0
    def execute(self):
        exitcode=1
        if len(self.fileList) == 0:
            return 0
        try:
            self.revert = False
            self.run()

            self.preSize = 0
            self.postSize = 0
            self.revert = True
            self.fileList = self.getReversionFileList()
            self.prefix = None
            self.run()
            exitcode = 0
            for i in range(len(self.fileList)):
                exitcode += checkEquivalent(self.originalFileList[i], self.fileList[i])
        except Exception as e:
            log("ERROR: " + str(e))
        finally:
            for f in self.fileList:
                try:
                    os.remove(f)
                except OSError:
                    # file never created
                    pass
        return exitcode
コード例 #3
0
ファイル: test.py プロジェクト: rmp/picopore
def checkEquivalent(fn1, fn2):
    log("Checking equivalence of {} (file 1) and {} (file 2)...".format(
        fn1, fn2))
    with h5py.File(fn1, 'r') as file1, h5py.File(fn2, 'r') as file2:
        checkContents(file1, file2)
        for group in file1.values():
            recursiveCheckEquivalent(file1, file2, group.name)
    log("Complete.")
コード例 #4
0
def checkEquivalent(fn1, fn2):
    log("Checking equivalence of {} (file 1) and {} (file 2)...".format(fn1, fn2))
    with h5py.File(fn1, 'r') as file1, h5py.File(fn2, 'r') as file2:
        exitcode = checkContents(file1, file2)
        for group in file1.values():
            exitcode += recursiveCheckEquivalent(file1, file2, group.name)
    log("Complete with {} errors.".format(exitcode))
    return exitcode
コード例 #5
0
def runRealtime(args):
    from picopore.realtime import ReadsFolder
    readsFolder = ReadsFolder(args)
    try:
        while True:
            sleep(1)
    except KeyboardInterrupt:
        log("\nExiting Picopore.")
    readsFolder.stop()
コード例 #6
0
ファイル: test.py プロジェクト: rmp/picopore
def checkData(data1, data2, name):
    match = data1 == data2
    if isArray(match):
        match = (data1 == data2).all()
    if not match:
        positions = [i for i in range(len(data1)) if not data1[i] == data2]
        for pos in positions:
            log("Failure: {}[{}] - file1={}, file2={}".format(
                name, pos, data1[pos], data2[pos]))
コード例 #7
0
 def execute(self):
     self.readsFolder.start()
     try:
         while True:
             sleep(5)
     except KeyboardInterrupt:
         log("\nExiting Picopore.")
     self.readsFolder.stop()
     return 0
コード例 #8
0
    def stop(self):
        log("Processing in-progress files. Press Ctrl-C again to abort.")
        try:
            self.runner.stop()
        except KeyboardInterrupt:
            log("Aborted.")
            pass

        self.observer.stop()
        self.observer.join()
コード例 #9
0
ファイル: runner.py プロジェクト: touala/picopore
def _process_func(filename, func, prefix, print_every):
    if prefix is not None:
        newFilename = getPrefixedFilename(filename, prefix)
        copyfile(filename, newFilename)
    else:
        newFilename = filename
    result = func(newFilename)
    if result is not None and print_every > 0 and np.random.rand(
    ) < 1.0 / print_every:
        log('.', end='')
    return result
コード例 #10
0
def checkRealtime(args):
    if args.realtime:
        log("Performing real time {} compression. ".format(args.mode), end='')
        if args.y:
            print('')
            return True
        elif checkSure():
            args.y = True
            return True
        else:
            return False
コード例 #11
0
ファイル: test.py プロジェクト: rmp/picopore
def checkContents(obj1, obj2, name=None):
    name = obj1.name if name is None else name
    keys1 = obj1.keys()
    keys2 = obj2.keys()
    for key in keys1:
        if key not in keys2:
            log("Failure: {} missing from file 2".format("/".join(
                [obj2.name, key])))
    for key in keys2:
        if key not in keys1:
            log("Failure: {} missing from file 1".format("/".join(
                [obj1.name, key])))
コード例 #12
0
ファイル: compress.py プロジェクト: touala/picopore
def compress(func, filename, group="all"):
    try:
        with h5py.File(filename, 'r+') as f:
            filtr = func(f, group)
        subprocess.call(
            ["h5repack", "-f", filtr, filename, "{}.tmp".format(filename)])
        subprocess.call(["mv", "{}.tmp".format(filename), filename])
        return os.path.getsize(filename)
    except Exception as e:
        log("ERROR: {} on file {}".format(str(e), filename))
        if os.path.isfile("{}.tmp".format(filename)):
            os.remove("{}.tmp".format(filename))
        return os.path.getsize(filename)
コード例 #13
0
ファイル: compress.py プロジェクト: rmp/picopore
def compress(func, filename, group="all", prefix=None):
	if prefix is not None:
		newFilename = getPrefixedFilename(filename, prefix)
		copyfile(filename, newFilename)
	else:
		newFilename = filename
	try:
		with h5py.File(newFilename, 'r+') as f:
			filtr = func(f, group)
		subprocess.call(["h5repack","-f",filtr,newFilename, "{}.tmp".format(newFilename)])
		subprocess.call(["mv","{}.tmp".format(newFilename),newFilename])
	except Exception as e:
		log(str(e))
コード例 #14
0
ファイル: runner.py プロジェクト: touala/picopore
 def postprocess(self, results):
     self.postSize = sum(results)
     if self.revert:
         preStr, postStr = "Compressed size:", "Reverted size:"
     else:
         preStr, postStr = "Original size:", "Compressed size:"
     str_len = max(len(preStr), len(postStr)) + 1
     num_len = len(str(max(self.preSize, self.postSize)))
     log("{}{}".format(preStr.ljust(str_len),
                       str(self.preSize).rjust(num_len)))
     log("{}{}".format(postStr.ljust(str_len),
                       str(self.postSize).rjust(num_len)))
     return self.preSize
コード例 #15
0
ファイル: rename.py プロジェクト: touala/picopore
def rename(filename, pattern, replacement):
    try:
        with h5py.File(filename, 'r+') as f:
            for path in findDatasets(f,
                                     entry_point="/",
                                     keyword=pattern,
                                     match_child=True):
                newPath = re.sub(pattern, replacement, path)
                f[newPath] = f[path]
                del f[path]
                log("Renamed {} to {}".format(path, newPath))
        return 0
    except Exception as e:
        log("ERROR: {} on file {}".format(str(e), filename))
        return 1
コード例 #16
0
ファイル: realtime.py プロジェクト: rmp/picopore
	def __init__(self, args):
		self.args = args
		self.event_handler = PatternMatchingEventHandler(patterns=["*.fast5"],
				ignore_patterns=[],
				ignore_directories=True)
		self.event_handler.on_created = self.on_created
		self.observer = Observer()
		observedPaths = []
		for path in args.input:
			if os.path.isdir(path):
				self.observer.schedule(self.event_handler, path, recursive=True)
				observedPaths.append(path)
		log("Monitoring {} in real time. Press Ctrl+C to exit.".format(", ".join(self.args.input)))
		self.observer.start()
		run(args.revert, args.mode, args.input, args.y, args.threads, args.group, args.prefix, args.fastq, args.summary)
コード例 #17
0
 def __init__(self, args):
     super(PicoporeRealtimeRunner, self).__init__(args)
     _, name = chooseCompressFunc(self.revert,
                                  self.mode,
                                  self.fastq,
                                  self.summary,
                                  self.manual,
                                  realtime=True)
     log(name + "...", end='')
     if self.y:
         log()
     elif checkSure():
         self.y = True
     else:
         exit(1)
     self.readsFolder = ReadsFolder(self)
コード例 #18
0
ファイル: compress.py プロジェクト: touala/picopore
def chooseCompressFunc(revert, mode, fastq, summary, manual, realtime=False):
    name = "Performing "
    if realtime:
        name += "real time "
    if revert:
        if mode == 'lossless':
            func = losslessDecompress
            name += "lossless decompression"
        elif mode == 'deep-lossless':
            func = deepLosslessDecompress
            name += "deep lossless decompression"
        else:
            log("Unable to revert raw files. Please use a basecaller instead.")
            exit(1)
    else:
        if mode == 'lossless':
            func = losslessCompress
            name += "lossless compression"
        elif mode == 'deep-lossless':
            func = deepLosslessCompress
            name += "deep lossless compression"
        elif mode == 'raw':
            name += "raw compression "
            if manual is not None:
                name += "with manual keyword " + manual
                keywords = [manual]
            else:
                keywords = __raw_compress_keywords__
                if fastq and summary:
                    name += "with FASTQ and summary"
                elif fastq:
                    keywords += __raw_compress_summary__
                    name += "with FASTQ and no summary"
                elif summary:
                    keywords += __raw_compress_fastq__
                    name += "with summary and no FASTQ"
                else:
                    keywords += __raw_compress_fastq_summary__
                    name += "with no summary and no FASTQ"
            func = partial(rawCompress, keywords=keywords)
    try:
        return partial(compress, func), name
    except NameError:
        log("No compression method selected")
        exit(1)
コード例 #19
0
ファイル: compress.py プロジェクト: touala/picopore
def rawCompress(f, group, keywords):
    if "Picopore" in f:
        log("{} is compressed using picopore deep-lossless compression. Please use picpore --revert --mode deep-lossless before attempting raw compression."
            .format(f.filename))
    else:
        paths = []
        for kw in keywords:
            paths.extend(findDatasets(f, group, keyword=kw))
        for path in paths:
            if path in f:
                del f[path]
        try:
            if len(f["Analyses"].keys()) == 0:
                del f["Analyses"]
        except KeyError:
            # no analyses, no worries
            pass
    return "GZIP=9"
コード例 #20
0
ファイル: runner.py プロジェクト: touala/picopore
 def run(self, postprocess=True):
     func, message = self.get_func()
     self.func = functools.partial(_process_func,
                                   func=func,
                                   prefix=self.prefix,
                                   print_every=self.print_every)
     fileList = self.getFileList()
     if len(fileList) == 0:
         return 0
     log("{} on {} files... ".format(message, len(fileList)))
     if self.y or checkSure():
         self.process(fileList)
         if postprocess:
             return self.stop()
         else:
             return self.multiprocessor.wait()
     else:
         log("User cancelled. Exiting.")
         exit(1)
コード例 #21
0
    def __init__(self, runner):
        self.runner = runner

        self.event_handler = PatternMatchingEventHandler(
            patterns=["*.fast5"], ignore_patterns=[], ignore_directories=True)
        self.event_handler.on_created = self.on_created
        self.event_handler.on_moved = self.on_moved

        self.observer = Observer()

        self.observedPaths = []
        for path in self.runner.input:
            if os.path.isdir(path):
                self.observer.schedule(self.event_handler,
                                       path,
                                       recursive=True)
                self.observedPaths.append(path)
        log("Monitoring {} in real time. Press Ctrl+C to exit.".format(
            ", ".join(self.observedPaths)))
コード例 #22
0
def run(revert,
        mode,
        inp,
        y=False,
        threads=1,
        group="all",
        prefix=None,
        fastq=True,
        summary=False):
    func, message = chooseCompressFunc(revert, mode, fastq, summary)
    fileList = recursiveFindFast5(inp)
    if len(fileList) == 0:
        return 0
    preSize = sum([os.path.getsize(f) for f in fileList])
    log("{} on {} files... ".format(message, len(fileList)))
    if y or checkSure():
        if threads <= 1:
            for f in fileList:
                compressWrapper([func, f, group, prefix])
        else:
            argList = [[func, f, group, prefix] for f in fileList]
            pool = Pool(threads)
            pool.map(compressWrapper, argList)
        if revert:
            preStr, postStr = "Compressed size:", "Reverted size:"
        else:
            preStr, postStr = "Original size:", "Compressed size:"
        log("Complete.")
        postSize = sum([
            os.path.getsize(getPrefixedFilename(f, prefix)) for f in fileList
        ])
        str_len = max(len(preStr), len(postStr)) + 1
        num_len = len(str(max(preSize, postSize)))
        log("{}{}".format(preStr.ljust(str_len), str(preSize).rjust(num_len)))
        log("{}{}".format(postStr.ljust(str_len),
                          str(postSize).rjust(num_len)))
        return 0
    else:
        log("User cancelled. Exiting.")
        exit(1)
コード例 #23
0
def recursiveCheckEquivalent(file1, file2, name):
    obj1 = file1[name]
    obj2 = file1[name]
    # check attributes
    attr1 = obj1.attrs
    attr2 = obj2.attrs
    attrsName = "/".join([name, "attrs"])
    exitcode = checkContents(attr1, attr2, attrsName)
    for key, value in attr1.items():
        try:
            if not attr2[key] == value:
                log("Failure: {} - file1={}, file2={}".format("/".join([attrsName, key]), value, attr2[key]))
                exitcode += 1
        except ValueError as e:
            # probably a numpy array
            if str(e) == "The truth value of an array with more than one element is ambiguous. Use a.any() or a.all()" and not (attr2[key] == value).all():
                log("Failure: {} - file1={}, file2={}".format("/".join([attrsName, key]), value, attr2[key]))
                exitcode += 1
    # check subgroups / datasets
    if isGroup(obj1):
        exitcode += checkContents(obj1, obj2)
        for key in obj1.keys():
            exitcode += recursiveCheckEquivalent(file1, file2, "/".join([name, key]))
    else:
        if not obj1.shape == obj2.shape:
            log("Failure: {}.shape - file1={}, file2={}".format(name, obj1.shape, obj2.shape))
        if obj1.dtype.names is None:
            # just one column
            exitcode += checkData(obj1, obj2, name)
        else:
            for col in obj1.dtype.names:
                exitcode += checkData(obj1[col], obj2[col], ".".join([name, col]))
    return exitcode
コード例 #24
0
ファイル: compress.py プロジェクト: rmp/picopore
def chooseCompressFunc(revert, mode, fastq, summary):
	if revert:
		if mode == 'lossless':
			func = losslessDecompress
			name = "Performing lossless decompression"
		elif mode == 'deep-lossless':
			func = deepLosslessDecompress
			name = "Performing deep lossless decompression"
		else:
			log("Unable to revert raw files. Please use a basecaller instead.")
			exit(1)
	else:
		if mode == 'lossless':
			func = losslessCompress
			name = "Performing lossless compression"
		elif mode == 'deep-lossless':
			func = deepLosslessCompress
			name = "Performing deep lossless compression"
		elif mode == 'raw':
			if fastq and summary:
				func = rawCompressFastqSummary
				name = "Performing raw compression with FASTQ and summary"
			elif fastq:
				func = rawCompressFastqNoSummary
				name = "Performing raw compression with FASTQ and no summary"
			elif summary:
				func = rawCompressSummaryNoFastq
				name = "Performing raw compression with summary and no FASTQ"
			else:
				func = rawCompressMinimal
				name = "Performing raw compression with no summary and no FASTQ"
	try:
		return func, name
	except NameError:
		log("No compression method selected")
		exit(1)
コード例 #25
0
ファイル: runner.py プロジェクト: touala/picopore
 def stop(self):
     results = self.multiprocessor.join()
     log("Complete.")
     return self.postprocess(results)
コード例 #26
0
ファイル: rename.py プロジェクト: touala/picopore
 def postprocess(self, results):
     log("Successfully renamed {} of {} files.".format(
         self.processed - sum(results), self.processed))
     return self.processed
コード例 #27
0
ファイル: __init__.py プロジェクト: touala/picopore
from __future__ import absolute_import
from subprocess import call, PIPE
from picopore.util import log

if not call("type h5repack", shell=True, stdout=PIPE, stderr=PIPE) == 0:
    log("h5repack (hdf5-tools) not installed. Aborting.")
    exit(1)