def saveMetadata(self, instance, schema, metadata): """Save all the metadata to a Dataset_Files paramamter set. """ parameters = self.getParameters(schema, metadata) exclude_line = dict() exclude_line['-----'] = None exclude_line['Reading global metadata'] = None exclude_line['Reading metadata'] = None exclude_line['Reading core metadata'] = None exclude_line['Populating metadata'] = None exclude_line['Reading tags'] = None exclude_line['Verifying Gatan format'] = None exclude_line['Initializing reader'] = None exclude_line['Checking file format [Gatan Digital Micrograph]'] = None if not parameters: return None try: ps = DatafileParameterSet.objects.get(schema=schema, dataset_file=instance) return ps # if already exists then just return it except DatafileParameterSet.DoesNotExist: ps = DatafileParameterSet(schema=schema, dataset_file=instance) ps.save() for p in parameters: print p.name if p.name in metadata: dfp = DatafileParameter(parameterset=ps, name=p) if p.isNumeric(): if metadata[p.name] != '': dfp.numerical_value = metadata[p.name] dfp.save() else: print p.name if isinstance(metadata[p.name], list): for val in reversed(metadata[p.name]): strip_val = val.strip() if strip_val: if not strip_val in exclude_line: dfp = DatafileParameter(parameterset=ps, name=p) dfp.string_value = strip_val dfp.save() else: dfp.string_value = metadata[p.name] dfp.save() return ps
def saveMetadata(self, instance, schema, metadata): """Save all the metadata to a Dataset_Files paramamter set. """ parameters = self.getParameters(schema, metadata) # Some/all? of these excludes below are specific to DM3 format: exclude_line = dict() if not parameters: print "Bailing out of saveMetadata because of 'not parameters'." return None try: ps = DatafileParameterSet.objects.get(schema=schema, datafile=instance) print "Parameter set already exists for %s, so we'll just " \ "return it." % instance.filename return ps # if already exists then just return it except DatafileParameterSet.DoesNotExist: ps = DatafileParameterSet(schema=schema, datafile=instance) ps.save() for p in parameters: print p.name if p.name in metadata: dfp = DatafileParameter(parameterset=ps, name=p) if p.isNumeric(): if metadata[p.name] != '': dfp.numerical_value = metadata[p.name] dfp.save() else: print p.name if isinstance(metadata[p.name], list): for val in reversed(metadata[p.name]): strip_val = val.strip() if strip_val: if strip_val not in exclude_line: dfp = DatafileParameter(parameterset=ps, name=p) dfp.string_value = strip_val dfp.save() else: dfp.string_value = metadata[p.name] dfp.save() return ps
def saveExifMetadata(self, instance, schema, metadata): """Save all the metadata to a Dataset_Files paramamter set. """ parameters = self.getParamaters(schema, metadata) if not parameters: return None try: ps = DatafileParameterSet.objects.get(schema=schema, dataset_file=instance) return ps # if already exists then just return it except DatafileParameterSet.DoesNotExist: ps = DatafileParameterSet(schema=schema, dataset_file=instance) ps.save() for p in parameters: if p.name in metadata: dfp = DatafileParameter(parameterset=ps, name=p) if p.isNumeric(): dfp.numerical_value = metadata[p.name] else: dfp.string_value = metadata[p.name] dfp.save() return ps
def saveSpectraMetadata(self, instance, schema, metadata): """Save all the metadata to a Dataset_Files paramamter set. """ parameters = self.getParamaters(schema, metadata) if not parameters: return None (ps, created) = DatafileParameterSet.objects.get_or_create(schema=schema, dataset_file=instance) if created: # new object was created ps.save() else: # if parameter set already exists then just return it return ps # save datafile parameters for p in parameters: if p.name in metadata: dfp = DatafileParameter(parameterset=ps, name=p) if p.isNumeric(): dfp.numerical_value = metadata[p.name][0] else: dfp.string_value = metadata[p.name][0] dfp.save() return ps
def test_000_update_df_status_offline(self, mock_stat): """update_df_status should check the online status of preferred DFOs for all previously online datafiles and update online Parameter to 'False' for any offline files.""" df1 = DataFile(dataset=self.dataset, filename="test_df.jpg") df1.save() dfo1 = DataFileObject(datafile=df1, storage_box=self.sbox1, uri="stream/test.jpg", verified=True) dfo1.save() schema = Schema.objects.get(namespace=HSM_DATAFILE_NAMESPACE) ps = DatafileParameterSet(schema=schema, datafile=df1) ps.save() param_name = ParameterName.objects.get(schema=schema, name="online") param = DatafileParameter(parameterset=ps, name=param_name) param.string_value = True param.save() mock_stat.return_value = Stats(st_size=10000, st_blocks=0, st_mtime=datetime.now()) update_df_status() params = DatafileParameter.objects.filter( parameterset__schema=schema, parameterset__datafile=df1) self.assertEquals(params.count(), 1) self.assertEquals(params[0].string_value, "False")
def saveFlexstationMetadata(self, instance, schema, metadata): """Saves or overwrites the datafile's metadata to a Dataset_Files parameter set in the database. """ logger.info('Saving Metadata') parameters = self.getParameters(schema, metadata) if not parameters: return None try: ps = DatafileParameterSet.objects.get(schema=schema, dataset_file=instance) return ps # if already exists then just return it except DatafileParameterSet.DoesNotExist: ps = DatafileParameterSet(schema=schema, dataset_file=instance) ps.save() for p in parameters: if p.name in metadata: dfp = DatafileParameter(parameterset=ps, name=p) if p.isNumeric(): if metadata[p.name] != '': dfp.numerical_value = metadata[p.name] dfp.save() else: dfp.string_value = metadata[p.name].decode('cp1252') dfp.save() return ps
def test_003_update_df_status_skip_offline(self, mock_stat, mock_df_online): """update_df_status should skip any files that have previously marked as offline.""" df2 = DataFile(dataset=self.dataset, filename="test_df2.jpg") df2.save() dfo2 = DataFileObject(datafile=df2, storage_box=self.sbox1, uri="stream/test_df2.jpg", verified=True) dfo2.save() # df2.verify() schema = Schema.objects.get(namespace=HSM_DATAFILE_NAMESPACE) ps2 = DatafileParameterSet(schema=schema, datafile=df2) ps2.save() param_name = ParameterName.objects.get(schema=schema, name="online") param2 = DatafileParameter(parameterset=ps2, name=param_name) param2.string_value = False param2.save() mock_stat.return_value = Stats(st_size=10000, st_blocks=100, st_mtime=datetime.now()) update_df_status() # assert that the df_online method wasn't called self.assertEquals(mock_df_online.call_count, 0)
def saveDicomMetadata(self, instance, schema, metadata): """Save all the metadata to a Dataset_Files paramamter set. """ # FIXME reenable this... #parameters = self.getParameters(schema, metadata) #if not parameters: # logger.debug('dicompng saveDicomMetadata: parameters == NULL :-(') # return None logger = logging.getLogger(__name__) logger.setLevel(10) logger.debug('dicompng saveDicomMetadata...') try: ps = DatafileParameterSet.objects.get(schema=schema, dataset_file=instance) return ps # if already exists then just return it except DatafileParameterSet.DoesNotExist: ps = DatafileParameterSet(schema=schema, dataset_file=instance) ps.save() logger.debug('dicompng UP TO HERE, WHAT NEXT?') try: logger.debug('dicompng UP TO HERE2') dfp = DatafileParameter(parameterset=ps, name=ParameterName.objects.get(name='dump')) logger.debug('dicompng UP TO HERE3') dfp.string_value = metadata logger.debug('dicompng UP TO HERE4: ' + metadata) dfp.save() logger.debug('dicompng UP TO HERE5') except Exception, e: logger.debug('ZZZ' + str(e)) return None
def saveMetadata(self, instance, schema, metadata): """Save all the metadata to a Dataset_Files parameter set. """ logger.error('Olympus-saveMetadata()') parameters = self.getParameters(schema, metadata) if not parameters: return None try: ps = DatafileParameterSet.objects.get(schema=schema, dataset_file=instance) return ps # if already exists then just return it except DatafileParameterSet.DoesNotExist: ps = DatafileParameterSet(schema=schema, dataset_file=instance) ps.save() for p in parameters: print p.name if p.name in metadata: dfp = DatafileParameter(parameterset=ps, name=p) if p.isNumeric(): if metadata[p.name] != '': dfp.numerical_value = metadata[p.name] dfp.save() else: print p.name if isinstance(metadata[p.name], list): for val in reversed(metadata[p.name]): strip_val = val.strip() if strip_val: if not strip_val in exclude_line: dfp = DatafileParameter(parameterset=ps, name=p) dfp.string_value = strip_val dfp.save() else: dfp.string_value = metadata[p.name] dfp.save() return ps
def savep(paramk, paramv): param_name = ParameterName.objects.get(schema__id=schema.id, name=paramk) dfp = DatafileParameter(parameterset=param_set, name=param_name) if paramv != "": if param_name.isNumeric(): dfp.numerical_value = paramv else: dfp.string_value = paramv dfp.save()
def create_df_status(datafile, schema_name, min_file_size): """Post-save celery task that checks online status of new file and create HSM metadata to track online status. Parameters ---------- datafile: DataFile datafile to check and create online/offline status metadata for schema_name: Schema name of Schema which describes ParameterNames min_file_size : int minimum size of files to check HSM status of. This param is simply passed on to df_online. Returns ------- None """ if datafile.verified: with DatafileLock(datafile, "datafile-%s" % datafile.id) as lock: if lock: schema = Schema.objects.get(namespace=schema_name) if DatafileParameterSet.objects.filter( schema=schema, datafile=datafile).exists(): LOGGER.debug( """HSM DatafileParameterSet already exists for: %s""", datafile.id ) return ps = DatafileParameterSet(schema=schema, datafile=datafile) ps.save() param_name = ParameterName.objects.get( schema=schema, name="online" ) dfp = DatafileParameter(parameterset=ps, name=param_name) dfp.string_value = str(df_online(datafile, min_file_size)) dfp.save() else: LOGGER.warning( """Cannot determine online/offline status for datafile %s " "is not verified""", datafile.id )
def test_002_update_df_status_skip_unverified(self, mock_stat, df_online): """update_df_status should skip files that are unverified""" df2 = DataFile(dataset=self.dataset, filename="test_df2.jpg") df2.save() dfo2 = DataFileObject(datafile=df2, storage_box=self.sbox1, uri="stream/test_df2.jpg") dfo2.save() schema = Schema.objects.get(namespace=HSM_DATAFILE_NAMESPACE) ps2 = DatafileParameterSet(schema=schema, datafile=df2) ps2.save() param_name = ParameterName.objects.get(schema=schema, name="online") param2 = DatafileParameter(parameterset=ps2, name=param_name) param2.string_value = True param2.save() mock_stat.return_value = Stats(st_size=10000, st_blocks=100, st_mtime=datetime.now()) update_df_status() df_online.assert_not_called()
def run_fcsplot(fcsplot_path, inputfilename, df_id, schema_id): """ Run fcsplot on a FCS file. """ cache = caches['celery-locks'] # Locking functions to ensure only one instance of # fcsplot operates on each datafile at a time. lock_id = 'fcs-filter-fcsplot-lock-%d' % df_id # cache.add fails if if the key already exists def acquire_lock(): return cache.add(lock_id, 'true', LOCK_EXPIRE) # cache.delete() can be slow, but we have to use it # to take advantage of using add() for atomic locking def release_lock(): cache.delete(lock_id) if acquire_lock(): try: schema = Schema.objects.get(id=schema_id) datafile = DataFile.objects.get(id=df_id) ps = DatafileParameterSet.objects.filter(schema=schema, datafile=datafile).first() if ps: prev_param = ParameterName.objects.get(schema__id=schema_id, name='previewImage') if DatafileParameter.objects.filter(parameterset=ps, name=prev_param).exists(): logger.info("FCS preview already exists for df_id %d" % df_id) return outputextension = "png" dfo = DataFileObject.objects.filter(datafile__id=df_id, verified=True).first() preview_image_rel_file_path = os.path.join( os.path.dirname(urlparse.urlparse(dfo.uri).path), str(df_id), '%s.%s' % (os.path.basename(inputfilename), outputextension)) preview_image_file_path = os.path.join( settings.METADATA_STORE_PATH, preview_image_rel_file_path) if not os.path.exists(os.path.dirname(preview_image_file_path)): os.makedirs(os.path.dirname(preview_image_file_path)) cmdline = "'%s' '%s' '%s' '%s'" % \ (sys.executable, fcsplot_path, inputfilename, preview_image_file_path) logger.info(cmdline) p = subprocess.Popen(cmdline, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, shell=True) stdout, _ = p.communicate() if p.returncode != 0: logger.error(stdout) return try: ps = DatafileParameterSet.objects.get(schema__id=schema_id, datafile__id=df_id) except DatafileParameterSet.DoesNotExist: ps = DatafileParameterSet(schema=schema, datafile=datafile) ps.save() param_name = ParameterName.objects.get(schema__id=schema_id, name='previewImage') dfp = DatafileParameter(parameterset=ps, name=param_name) dfp.string_value = preview_image_rel_file_path dfp.save() except: logger.error(traceback.format_exc()) finally: release_lock()
def run_showinf(showinf_path, inputfilename, df_id, schema_id): """ Run showinf on FCS file to extract metadata. """ cache = caches['celery-locks'] # Locking functions to ensure only one instance of # showinf operates on each datafile at a time. lock_id = 'fcs-filter-showinf-lock-%d' % df_id # cache.add fails if if the key already exists def acquire_lock(): return cache.add(lock_id, 'true', LOCK_EXPIRE) # cache.delete() can be slow, but we have to use it # to take advantage of using add() for atomic locking def release_lock(): cache.delete(lock_id) if acquire_lock(): try: schema = Schema.objects.get(id=schema_id) datafile = DataFile.objects.get(id=df_id) ps = DatafileParameterSet.objects.filter(schema=schema, datafile=datafile).first() if ps: file_param = ParameterName.objects.get(schema__id=schema_id, name='file') if DatafileParameter.objects.filter(parameterset=ps, name=file_param).exists(): logger.info("FCS metadata already exists for df_id %d" % df_id) return cmdline = "'%s' '%s' '%s'" % \ (sys.executable, showinf_path, inputfilename) logger.info(cmdline) p = subprocess.Popen(cmdline, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, shell=True) stdout, _ = p.communicate() if p.returncode != 0: logger.error(stdout) return image_info_list = stdout.split('\n') metadata = { 'file': "", 'date': "", 'parametersAndStainsTable': ""} readingParametersAndStainsTable = False for line in image_info_list: m = re.match("File: (.*)", line) if m: metadata['file'] = m.group(1) m = re.match("Date: (.*)", line) if m: metadata['date'] = m.group(1) if line.strip() == "<ParametersAndStains>": readingParametersAndStainsTable = True elif line.strip() == "</ParametersAndStains>": readingParametersAndStainsTable = False elif readingParametersAndStainsTable: metadata['parametersAndStainsTable'] += line try: ps = DatafileParameterSet.objects.get(schema__id=schema_id, datafile__id=df_id) except DatafileParameterSet.DoesNotExist: ps = DatafileParameterSet(schema=schema, datafile=datafile) ps.save() param_name_strings = ['file', 'date', 'parametersAndStainsTable'] for param_name_str in param_name_strings: try: param_name = ParameterName.objects.get(schema__id=schema_id, name=param_name_str) except ParameterName.DoesNotExist: logger.error("Didn't find parameter %s in schema id %s" % (param_name_str, schema_id)) dfp = DatafileParameter(parameterset=ps, name=param_name) dfp.string_value = metadata[param_name_str] dfp.save() except: logger.error(traceback.format_exc()) finally: release_lock()
def run_showinf(showinf_path, inputfilename, df_id, schema_id): """ Run Bioformats showinf to extract metadata. """ cache = caches['celery-locks'] # Locking functions to ensure only one instance of # showinf operates on each datafile at a time. lock_id = 'bioformats-filter-showinf-lock-%d' % df_id # cache.add fails if if the key already exists def acquire_lock(): return cache.add(lock_id, 'true', LOCK_EXPIRE) # cache.delete() can be slow, but we have to use it # to take advantage of using add() for atomic locking def release_lock(): cache.delete(lock_id) if acquire_lock(): try: schema = Schema.objects.get(id=schema_id) datafile = DataFile.objects.get(id=df_id) ps = DatafileParameterSet.objects.filter(schema=schema, datafile=datafile).first() if ps: info_param = ParameterName.objects.get(schema__id=schema_id, name='image_information') if DatafileParameter.objects.filter(parameterset=ps, name=info_param).exists(): logger.info("Metadata already exists for df_id %d" % df_id) return cmdline = "'%s' '%s' -nopix" % (showinf_path, inputfilename) logger.info(cmdline) p = subprocess.Popen(cmdline, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, shell=True) stdout, _ = p.communicate() if p.returncode != 0: logger.error(stdout) return image_info_list = stdout.split('\n')[11:] # Some/all? of these excludes below are specific to DM3 format: exclude_line = dict() exclude_line['-----'] = None exclude_line['Reading global metadata'] = None exclude_line['Reading metadata'] = None exclude_line['Reading core metadata'] = None exclude_line['Populating metadata'] = None exclude_line['Reading tags'] = None exclude_line['Verifying Gatan format'] = None exclude_line['Initializing reader'] = None exclude_line['Checking file format [Gatan Digital Micrograph]'] = \ None try: ps = DatafileParameterSet.objects.get(schema__id=schema_id, datafile__id=df_id) except DatafileParameterSet.DoesNotExist: ps = DatafileParameterSet(schema=schema, datafile=datafile) ps.save() param_name = ParameterName.objects.get(schema__id=schema_id, name='image_information') for val in reversed(image_info_list): strip_val = val.strip() if strip_val: if strip_val not in exclude_line: dfp = DatafileParameter(parameterset=ps, name=param_name) dfp.string_value = strip_val dfp.save() finally: release_lock()
def run_bfconvert(bfconvert_path, inputfilename, df_id, schema_id): """ Run Bioformats bfconvert on an image file. """ cache = caches['celery-locks'] # Locking functions to ensure only one instance of # bfconvert operates on each datafile at a time. lock_id = 'bioformats-filter-bfconvert-lock-%d' % df_id # cache.add fails if if the key already exists def acquire_lock(): return cache.add(lock_id, 'true', LOCK_EXPIRE) # cache.delete() can be slow, but we have to use it # to take advantage of using add() for atomic locking def release_lock(): cache.delete(lock_id) if acquire_lock(): try: schema = Schema.objects.get(id=schema_id) datafile = DataFile.objects.get(id=df_id) ps = DatafileParameterSet.objects.filter(schema=schema, datafile=datafile).first() if ps: prev_param = ParameterName.objects.get(schema__id=schema_id, name='previewImage') if DatafileParameter.objects.filter(parameterset=ps, name=prev_param).exists(): logger.info("Preview image already exists for df_id %d" % df_id) return outputextension = "png" dfo = DataFileObject.objects.filter(datafile__id=df_id, verified=True).first() preview_image_rel_file_path = os.path.join( os.path.dirname(urlparse.urlparse(dfo.uri).path), str(df_id), '%s.%s' % (os.path.basename(inputfilename), outputextension)) preview_image_file_path = os.path.join( settings.METADATA_STORE_PATH, preview_image_rel_file_path) if not os.path.exists( os.path.dirname(preview_image_file_path)): os.makedirs(os.path.dirname(preview_image_file_path)) # Extract only the first image from the stack: cmdline = "'%s' -series 0 -timepoint 0 -channel 0 -z 0 " \ "'%s' '%s' -overwrite" %\ (bfconvert_path, inputfilename, preview_image_file_path) logger.info(cmdline) p = subprocess.Popen(cmdline, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, shell=True) stdout, _ = p.communicate() if p.returncode != 0: logger.error(stdout) return os.rename(preview_image_file_path, preview_image_file_path + '.bioformats') # Run ImageMagick convert with contrast-stretch on an image file. # We could probably do this with the Wand Python module instead. cmdline = "convert '%s.bioformats' -contrast-stretch 0 '%s'" %\ (preview_image_file_path, preview_image_file_path) logger.info(cmdline) p = subprocess.Popen(cmdline, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, shell=True) stdout, _ = p.communicate() os.unlink(preview_image_file_path + '.bioformats') if p.returncode != 0: logger.error(stdout) return try: ps = DatafileParameterSet.objects.get(schema__id=schema_id, datafile__id=df_id) except DatafileParameterSet.DoesNotExist: ps = DatafileParameterSet(schema=schema, datafile=datafile) ps.save() param_name = ParameterName.objects.get(schema__id=schema_id, name='previewImage') dfp = DatafileParameter(parameterset=ps, name=param_name) dfp.string_value = preview_image_rel_file_path dfp.save() finally: release_lock()