def fromString(stringValue): """ Create an OpticsGroups from string content (STAR format) """ f = io.StringIO(stringValue) t = Table() t.readStar(f, tableName='optics') return OpticsGroups(t)
def createOutputStep(self): imgSet = self._getInputParticles() vol = Volume() vol.setFileName(self._getExtraPath('relion_class001.mrc')) vol.setSamplingRate(imgSet.getSamplingRate()) half1 = self._getFileName("final_half1_volume", ref3d=1) half2 = self._getFileName("final_half2_volume", ref3d=1) vol.setHalfMaps([half1, half2]) outImgSet = self._createSetOfParticles() outImgSet.copyInfo(imgSet) self._fillDataFromIter(outImgSet, self._lastIter()) self._defineOutputs(outputVolume=vol) self._defineSourceRelation(self.inputParticles, vol) self._defineOutputs(outputParticles=outImgSet) self._defineTransformRelation(self.inputParticles, outImgSet) fsc = FSC(objLabel=self.getRunName()) fn = self._getExtraPath("relion_model.star") table = Table(fileName=fn, tableName='model_class_1') resolution_inv = table.getColumnValues('rlnResolution') frc = table.getColumnValues('rlnGoldStandardFsc') fsc.setData(resolution_inv, frc) self._defineOutputs(outputFSC=fsc) self._defineSourceRelation(vol, fsc)
def createOutputStep(self): imgSet = self.inputParticles.get() partSet = self._createSetOfParticles() partSet.copyInfo(imgSet) outImagesMd = self._getExtraPath('expanded_particles.star') # remove repeating rlnImageId column tableName = '' if Plugin.IS_GT30(): tableName = 'particles' mdOptics = Table(fileName=outImagesMd, tableName='optics') mdOut = Table(fileName=outImagesMd, tableName=tableName) mdOut.removeColumns("rlnImageId") with open(outImagesMd, "w") as f: mdOut.writeStar(f, tableName=tableName) if Plugin.IS_GT30(): mdOptics.writeStar(f, tableName='optics') reader = convert.createReader() reader.readSetOfParticles( outImagesMd, partSet, alignType=ALIGN_PROJ, postprocessImageRow=self._postprocessImageRow) self._defineOutputs(outputParticles=partSet) self._defineSourceRelation(imgSet, partSet)
def _getGoodMicFns(self, numPass): """ Parse output star file and get a list of good mics. """ micNames = [] if os.path.exists(self.getOutputFilename(numPass)): table = Table(fileName=self.getOutputFilename(numPass), tableName='') micNames = table.getColumnValues('rlnMicrographName') return micNames
def _createTableFromDict(self, rowDict): """ Helper function to create a Table instance from an input dict with keys as columns names and type the type of the values in the dict. """ return Table(columns=[ Table.Column(k, type=type(v)) for k, v in rowDict.items() ])
def _plotFSC(self, a, model_star, label, legend=None): if legend is None: legend = label table = Table(fileName=model_star, tableName='model_class_1') resolution_inv = table.getColumnValues('rlnResolution') frc = table.getColumnValues('rlnGoldStandardFsc') fsc = FSC(objLabel=legend) fsc.setData(resolution_inv, frc) return fsc
def plotMdAngularDistribution(self, title, angularMd, tableName=None, color='blue'): """Create an special type of subplot, representing the angular distribution of weight projections. A metadata should be provided containing labels: RLN_ORIENT_ROT, RLN_ORIENT_TILT """ table = Table(fileName=angularMd, tableName=tableName) rot = radians(table.getColumnValues('rlnAngleRot')) tilt = radians(table.getColumnValues('rlnAngleTilt')) self.plotAngularDistribution(title, rot, tilt)
def _getExtraCtfParams(self): """Remove once optics parsing is implemented in parse_ctf_star""" mdOptics = Table(fileName=self._getFileName('input_parts'), tableName='optics')[0] cs = mdOptics.rlnSphericalAberration amp = mdOptics.rlnAmplitudeContrast kv = mdOptics.rlnVoltage mdParts = Table(fileName=self._getFileName('input_parts'), tableName='particles')[0] ps = getattr(mdParts, 'rlnCtfPhaseShift', 0.) return cs, amp, kv, ps
def test_read_blocks(self): """ Read an star file with several blocks """ print("Reading micrograph star file...") t1 = Table() f1 = StringIO(one_micrograph_mc) # This is a single-row table (different text format key, value) print("\tread data_general ..") t1.readStar(f1, tableName='general') goldValues = [('rlnImageSizeX', 3710), ('rlnImageSizeY', 3838), ('rlnImageSizeZ', 24), ('rlnMicrographMovieName', 'Movies/20170629_00027_frameImage.tiff'), ('rlnMicrographGainName', 'Movies/gain.mrc'), ('rlnMicrographBinning', 1.000000), ('rlnMicrographOriginalPixelSize', 0.885000), ('rlnMicrographDoseRate', 1.277000), ('rlnMicrographPreExposure', 0.000000), ('rlnVoltage', 200.000000), ('rlnMicrographStartFrame', 1), ('rlnMotionModelVersion', 1) ] self._checkColumns(t1, [k for k, v in goldValues]) row = t1[0] for k, v in goldValues: self.assertEqual(getattr(row, k), v, "data_general table check failed!") print("\tread data_global_shift ..") t1.readStar(f1, tableName='global_shift') cols = t1.getColumns() self.assertEqual(len(t1), 24, "Number of rows check failed!") self._checkColumns(t1, ['rlnMicrographFrameNumber', 'rlnMicrographShiftX', 'rlnMicrographShiftY']) print("\tread data_local_motion_model ..") t1.readStar(f1, tableName='local_motion_model') self.assertEqual(len(t1), 36, "Number of rows check failed!") self._checkColumns(t1, ['rlnMotionModelCoeffsIdx', 'rlnMotionModelCoeff']) coeffs = [int(v) for v in t1.getColumnValues('rlnMotionModelCoeffsIdx')] self.assertEqual(coeffs, list(range(36)), "rlnMotionModelCoeffsIdx check failed") f1.close()
def test_iterRows(self): print("Checking iterRows...") dataFile = testfile('star', 'refine3d', 'run_it016_data.star') table = Table(fileName=dataFile, tableName='particles') # Let's open again the same file for iteration with open(dataFile) as f: tableReader = Table.Reader(f, tableName='particles') for c1, c2 in zip(table.getColumns(), tableReader.getColumns()): self.assertEqual(c1, c2, "Column c1 (%s) differs from c2 (%s)" % (c1, c2)) for r1, r2 in zip(table, tableReader): self.assertEqual(r1, r2) # Now try directly with iterRows function for r1, r2 in zip(table, Table.iterRows(dataFile, tableName='particles')): self.assertEqual(r1, r2) defocusSorted = sorted(float(r.rlnDefocusU) for r in table) for d1, row in zip(defocusSorted, Table.iterRows(dataFile, tableName='particles', key=lambda r: r.rlnDefocusU)): self.assertAlmostEqual(d1, row.rlnDefocusU) # Test sorting by imageName column, also using getColumnValues and sort() imageIds = table.getColumnValues('rlnImageName') imageIds.sort() # Check sorted iteration give the total amount of rows rows = [r for r in Table.iterRows(dataFile, tableName='particles', key='rlnImageName')] self.assertEqual(len(imageIds), len(rows)) for id1, row in zip(imageIds, Table.iterRows(dataFile, tableName='particles', key='rlnImageName')): self.assertEqual(id1, row.rlnImageName) def getIter(): """ Test a function to get an iterator. """ return Table.iterRows(dataFile, tableName='particles', key='rlnImageName') iterByIds = getIter() for id1, row in zip(imageIds, iterByIds): self.assertEqual(id1, row.rlnImageName)
def read_metadata(): dataFile = testfile('star', 'refine3d', 'run_it016_sampling.star') tables = [] for i in range(N): tables.append(Table(fileName=dataFile, tableName='sampling_directions')) memory_usage()
def _getInputVolumes(self, postStar): """ Parse the input volumes: halves and mask from the postprocess.star file. """ table = Table(fileName=postStar, tableName='general') row = table[0] return (row.rlnUnfilteredMapHalf1, row.rlnUnfilteredMapHalf2, row.rlnMaskName)
def _createClasses(self, partSet): self._classesDict = {} # store classes info, indexed by class id pathDict = {} self.protocol.info('Loading classes info from: %s' % self._modelStarFile) table = Table(fileName=self._modelStarFile, tableName='model_classes') for classNumber, row in enumerate(table): index, fn = relionToLocation(row.rlnReferenceImage) if fn in pathDict: newFn = pathDict.get(fn) else: clsPath = pwutils.findRootFrom(self._modelStarFile, fn) if clsPath is None: newFn = fn else: newFn = self.protocol._getExtraPath(os.path.basename(fn)) self.copyOrLink(os.path.join(clsPath, fn), newFn) pathDict[fn] = newFn self._classesDict[classNumber + 1] = (index, newFn, row) clsSet = self._classesFunc(partSet) clsSet.classifyItems(updateClassCallback=self._updateClass) self.protocol._defineOutputs(outputClasses=clsSet) self.protocol._defineSourceRelation(partSet, clsSet)
def _plotSSNR(self, a, fn, table, label): table = Table(fileName=fn, tableName=table) ssnr = map(float, table.getColumnValues('rlnSsnrMap')) resolution_inv = map(float, table.getColumnValues('rlnResolution')) ssnrDict = {k: v for (k, v) in zip(ssnr, resolution_inv)} ssnrNewDict = {} for ssnr in ssnrDict: # only cross by 1 is important if ssnr > 0.9: ssnrNewDict[log(ssnr)] = ssnrDict[ssnr] resolution_inv = list(ssnrNewDict.values()) frc = list(ssnrNewDict.keys()) a.plot(resolution_inv, frc, label=label) a.xaxis.set_major_formatter(self._plotFormatter)
def getVolumesFromPostprocess(postStar): """ Return the filenames of half1, half2 and mask from a given postprocess.star file. """ table = Table(fileName=postStar, tableName='general') row = table[0] return (row.rlnUnfilteredMapHalf1, row.rlnUnfilteredMapHalf2, row.rlnMaskName)
def _getDataDir(self): """ We assume all mrcs stacks are in the same folder. """ mdOptics = Table(fileName=self._getFileName('input_parts'), tableName='particles') row = mdOptics[0] location = str(row.rlnImageName) return os.path.dirname(location.split('@')[1])
def _showChanges(self, paramName=None): labels = ['rlnIterationNumber'] + self.protocol.CHANGE_LABELS tableChanges = Table(columns=labels) print( "Computing average changes in offset, angles, and class membership" ) for it in self._getAllIters(): fn = self.protocol._getFileName('optimiser', iter=it) if not os.path.exists(fn): continue print("Computing data for iteration; %03d" % it) fn = self.protocol._getFileName('optimiser', iter=it) table = Table(fileName=fn, tableName='optimiser_general') row = table[0] cols = [ getattr(row, value) for value in self.protocol.CHANGE_LABELS ] tableChanges.addRow(it, *cols) fn = self.protocol._getFileName('all_changes') with open(fn, 'w') as f: tableChanges.writeStar(f) return [self.createDataView(fn)]
def _showPMax(self, paramName=None): labels = ['rlnIterationNumber', 'rlnAveragePmax', 'rlnLogLikelihood'] tablePMax = Table(columns=labels) for it in self._getAllIters(): if it == 1: # skip iter1 with Pmax=1 continue # always list all iterations prefix = self.protocol.PREFIXES[0] fn = self.protocol._getFileName(prefix + 'model', iter=it) table = Table(fileName=fn, tableName='model_general') row = table[0] tablePMax.addRow(int(it), float(row.rlnAveragePmax), float(row.rlnLogLikelihood)) fn = self.protocol._getFileName('all_avgPmax') with open(fn, 'w') as f: tablePMax.writeStar(f) xplotter = RelionPlotter() xplotter.createSubPlot("Avg PMax per Iterations", "Iterations", "Avg PMax") xplotter.plotMd(tablePMax, 'rlnIterationNumber', 'rlnAveragePmax') xplotter.showLegend(['rlnAveragePmax']) return [self.createDataView(fn), xplotter]
def _write(self, f): # Create columns from the first row items = self.first()._asdict().items() cols = [Table.Column(k, type(v)) for k, v in items] t = Table(columns=cols) for og in self._dict.values(): t.addRow(*og) t.writeStar(f, tableName='optics')
def _summaryNormal(self): summary = [] if not hasattr(self, 'outputVolume'): summary.append("Output volume not ready yet.") it = self._lastIter() or -1 if it >= 1 and it > self._getContinueIter(): table = Table(fileName=self._getFileName('half1_model', iter=it), tableName='model_general') row = table[0] resol = float(row.rlnCurrentResolution) summary.append("Current resolution: *%0.2f A*" % resol) else: table = Table(fileName=self._getFileName('modelFinal'), tableName='model_general') row = table[0] resol = float(row.rlnCurrentResolution) summary.append("Final resolution: *%0.2f A*" % resol) return summary
def _summaryNormal(self): summary = [] it = self._lastIter() or -1 if it >= 1: table = Table(fileName=self._getFileName('model', iter=it), tableName='model_general') row = table[0] resol = float(row.rlnCurrentResolution) summary.append("Current resolution: *%0.2f*" % resol) return summary
def _loadVolsInfo(self): """ Read some information about the produced Relion bodies from the *model.star file. """ self._volsInfo = {} mdTable = Table(fileName=self._getFileName('finalModel'), tableName='model_bodies') for body, row in enumerate(mdTable): self._volsInfo[body + 1] = row
def _summary(self): summary = [] postStarFn = self._getExtraPath("postprocess.star") if os.path.exists(postStarFn): table = Table(fileName=postStarFn, tableName='general') row = table[0] summary.append("Final resolution: *%0.2f A*" % float(row.rlnFinalResolution)) return summary
def test_removeColumns(self): print("Checking removeColumns...") dataFile = testfile('star', 'refine3d', 'run_it016_data.star') table = Table(fileName=dataFile, tableName='particles') expectedCols = [ 'rlnCoordinateX', 'rlnCoordinateY', 'rlnAutopickFigureOfMerit', 'rlnClassNumber', 'rlnAnglePsi', 'rlnImageName', 'rlnMicrographName', 'rlnOpticsGroup', 'rlnCtfMaxResolution', 'rlnCtfFigureOfMerit', 'rlnDefocusU', 'rlnDefocusV', 'rlnDefocusAngle', 'rlnCtfBfactor', 'rlnCtfScalefactor', 'rlnPhaseShift', 'rlnGroupNumber', 'rlnAngleRot', 'rlnAngleTilt', 'rlnOriginXAngst', 'rlnOriginYAngst', 'rlnNormCorrection', 'rlnLogLikeliContribution', 'rlnMaxValueProbDistribution', 'rlnNrOfSignificantSamples', 'rlnRandomSubset' ] colsToRemove = [ 'rlnOriginXAngst', 'rlnOriginYAngst', 'rlnNormCorrection', 'rlnAnglePsi', 'rlnMaxValueProbDistribution' ] # Check all columns were read properly self.assertEqual(expectedCols, table.getColumnNames()) # Check also using hasAllColumns method self.assertTrue(table.hasAllColumns(expectedCols)) table.removeColumns(colsToRemove) self.assertEqual([c for c in expectedCols if c not in colsToRemove], table.getColumnNames()) # Check also using hasAnyColumn method self.assertFalse(table.hasAnyColumn(colsToRemove))
def createFinalFilesStep(self): # -----metadata to save all final models------- finalModel = self._getFileName('finalModel') finalModelMd = self._getMetadata() # -----metadata to save all final particles----- finalData = self._getFileName('finalData') fn = self._getFileName('rawFinalData') print("FN: ", fn) tableIn = Table(fileName=fn, tableName='particles') cols = [str(c) for c in tableIn.getColumnNames()] ouTable = Table(columns=cols, tableName='particles') for rLev in self._getRLevList(): it = self._lastIter(rLev) modelFn = self._getFileName('model', iter=it, lev=self._level, rLev=rLev) modelMd = self._getMetadata('model_classes@' + modelFn) refLabel = md.RLN_MLMODEL_REF_IMAGE imgRow = md.getFirstRow(modelMd) fn = imgRow.getValue(refLabel) mapId = self._getRunLevId(rLev=rLev) newMap = self._getMapById(mapId) imgRow.setValue(refLabel, newMap) copyFile(fn, newMap) self._mapsDict[fn] = mapId imgRow.addToMd(finalModelMd) dataFn = self._getFileName('data', iter=it, lev=self._level, rLev=rLev) pTable = Table() for row in pTable.iterRows(dataFn, tableName='particles'): newRow = row._replace(rlnClassNumber=rLev) ouTable.addRow(*newRow) self.writeStar(finalData, ouTable) finalModelMd.write('model_classes@' + finalModel)
def readSetOfParticles(self, starFile, partSet, **kwargs): """ Convert a star file into a set of particles. Params: starFile: the filename of the star file partsSet: output particles set Keyword Arguments: blockName: The name of the data block (default particles) alignType: alignment type removeDisabled: Remove disabled items """ self._preprocessImageRow = kwargs.get('preprocessImageRow', None) self._alignType = kwargs.get('alignType', ALIGN_NONE) self._postprocessImageRow = kwargs.get('postprocessImageRow', None) self._optics = OpticsGroups.fromStar(starFile) self._pixelSize = getattr(self._optics.first(), 'rlnImagePixelSize', 1.0) self._invPixelSize = 1. / self._pixelSize partsReader = Table.Reader(starFile, tableName='particles') firstRow = partsReader.getRow() self._setClassId = hasattr(firstRow, 'rlnClassNumber') self._setCtf = partsReader.hasAllColumns(self.CTF_LABELS[:3]) particle = Particle() if self._setCtf: particle.setCTF(CTFModel()) self._setAcq = kwargs.get("readAcquisition", True) acq = Acquisition() acq.setMagnification(kwargs.get('magnification', 10000)) extraLabels = kwargs.get('extraLabels', []) + PARTICLE_EXTRA_LABELS self.createExtraLabels(particle, firstRow, extraLabels) self._rowToPart(firstRow, particle) partSet.setSamplingRate(self._pixelSize) partSet.setAcquisition(acq) self._optics.toImages(partSet) partSet.append(particle) for row in partsReader: self._rowToPart(row, particle) partSet.append(particle) partSet.setHasCTF(self._setCtf) partSet.setAlignment(self._alignType)
def _fillDataFromIter(self, imgSet, iteration): tableName = 'particles@' if self.IS_GT30() else '' outImgsFn = self._getFileName('data', iter=iteration) imgSet.setAlignmentProj() self.reader = convert.createReader(alignType=ALIGN_PROJ, pixelSize=imgSet.getSamplingRate()) mdIter = Table.iterRows(tableName + outImgsFn, key='rlnImageId') imgSet.copyItems(self._getInputParticles(), doClone=False, updateItemCallback=self._updateParticle, itemDataIterator=mdIter)
def test_read_particles(self): """ Read from a particles .star file """ print("Reading particles star file...") t1 = Table() f1 = StringIO(particles_3d_classify) t1.readStar(f1) cols = t1.getColumns() self.assertEqual(len(t1), 16, "Number of rows check failed!") self.assertEqual(len(cols), 25, "Number of columns check failed!") # Check that all rlnOpticsGroup is 1 and rlnImageName file is the same for i, row in enumerate(t1): self.assertEqual(row.rlnOpticsGroup, 1, "rlnOpticsGroup check failed!") self.assertEqual(row.rlnImageName.split("@")[1], "Extract/job012/Movies/20170629_00021_frameImage.mrcs", "rlnImageId check failed!") f1.close()
def test_readSetOfParticlesAfterCtf(self): if not Plugin.IS_GT30(): print("Skipping test (required Relion > 3.1)") return starFile = self.ds.getFile( "CtfRefine/job023/particles_ctf_refine.star") partsReader = Table.Reader(starFile, tableName='particles') firstRow = partsReader.getRow() partsSet = self.__readParticles(starFile) first = partsSet.getFirstItem() ogLabels = ['rlnBeamTiltX', 'rlnBeamTiltY'] extraLabels = ['rlnCtfBfactor', 'rlnCtfScalefactor', 'rlnPhaseShift'] for l in extraLabels: value = getattr(first, '_%s' % l) self.assertIsNotNone(value, "Missing label: %s" % l) self.assertAlmostEqual(getattr(firstRow, l), value) fog = OpticsGroups.fromImages(partsSet).first() self.assertTrue(all(hasattr(fog, l) for l in ogLabels)) # Also test writing and preserving extra labels outputStar = self.getOutputPath('particles.star') print(">>> Writing to particles star: %s" % outputStar) starWriter = convert.createWriter() starWriter.writeSetOfParticles(partsSet, outputStar) fog = OpticsGroups.fromStar(outputStar).first() self.assertTrue(all(hasattr(fog, l) for l in ogLabels)) partsReader = Table.Reader(outputStar, tableName='particles') firstRow = partsReader.getRow() for l in extraLabels: value = getattr(first, '_%s' % l) self.assertIsNotNone(value, "Missing label: %s" % l) self.assertAlmostEqual(getattr(firstRow, l), value)
def _findImagesPath(self, label, warnings=True): '''This function validates the input path for the binaries and gets the acquisition settings from the first row''' # read the first table table = Table(fileName=self._starFile) acqRow = row = table[0] if row is None: raise Exception("Cannot import from empty metadata: %s" % self._starFile) if not row.get('rlnOpticsGroup', False): self.version30 = True self.protocol.warning("Import from Relion version < 3.1 ...") else: acqRow = OpticsGroups.fromStar(self._starFile) # read particles table table = Table(fileName=self._starFile, tableName='particles') row = table[0] if not row.get(label, False): raise Exception("Label *%s* is missing in metadata: %s" % (label, self._starFile)) index, fn = relionToLocation(row.get(label)) self._imgPath = pwutils.findRootFrom(self._starFile, fn) if warnings and self._imgPath is None: self.protocol.warning( "Binary data was not found from metadata: %s" % self._starFile) # Check if the MetaData contains either MDL_MICROGRAPH_ID # or MDL_MICROGRAPH, this will be used when imported # particles to keep track of the particle's micrograph self._micIdOrName = (row.get('rlnMicrographName', False) or row.get('rlnMicrographId', False)) # init dictionary. It will be used in the preprocessing self._stackTrans = None self._micTrans = None print("acqRow", acqRow) return row, None, acqRow