def extractCoordinates(self, partsIds=None): inPart = self.getInputParticles() inMics = self.getInputMicrographs() scale = inPart.getSamplingRate() / inMics.getSamplingRate() print("Scaling coordinates by a factor *%0.2f*" % scale) alignType = inPart.getAlignment() suffix = self.getSuffix(partsIds[0]) if partsIds is not None else '' outputCoords = self._createSetOfCoordinates(inMics, suffix=suffix) # Prepare a double key dictionary to do the match: micname and micId micDict = dict() for mic in inMics.iterItems(): # Clone the mics! otherwise we will get pointers and # will end up with the same mic in the dictionary. clonedMic = mic.clone() micDict[clonedMic.getObjId()] = clonedMic micDict[clonedMic.getMicName()] = clonedMic def appendCoordFromParticle(part): coord = part.getCoordinate() # Try micname micName = coord.getMicName() mic = micDict.get(micName, None) # Try micid if mic is None: micKey = coord.getMicId() mic = micDict.get(micKey, None) if mic is None: print("Skipping particle, %s or id %s not found" % (micName, micKey)) else: newCoord.copyObjId(part) x, y = coord.getPosition() if self.applyShifts: shifts = self.getShifts(part.getTransform(), alignType) xCoor, yCoor = x - int(shifts[0]), y - int(shifts[1]) newCoord.setPosition(xCoor * scale, yCoor * scale) else: newCoord.setPosition(x * scale, y * scale) newCoord.setMicrograph(mic) outputCoords.append(newCoord) newCoord = emobj.Coordinate() if self.streamingModeOn: for partId in partsIds: particle = inPart[partId] appendCoordFromParticle(particle) else: for particle in inPart: appendCoordFromParticle(particle) boxSize = inPart.getXDim() * scale outputCoords.setBoxSize(boxSize) return outputCoords
def testWriteSetOfCoordinatesWithoutFlip(self): from collections import OrderedDict # Define a temporary sqlite file for micrographs fn = self.getOutputPath('convert_mics.sqlite') mics = emobj.SetOfMicrographs(filename=fn) # Create SetOfCoordinates data # Define a temporary sqlite file for coordinates fn = self.getOutputPath('convert_coordinates.sqlite') coordSet = emobj.SetOfCoordinates(filename=fn) coordSet.setBoxSize(60) coordSet.setMicrographs(mics) data = OrderedDict() data['006'] = [(30, 30)] data['016'] = [(40, 40)] micList = [] for key, coords in data.items(): mic = emobj.Micrograph(self.ds.getFile('micrographs/%s.mrc' % key)) mics.append(mic) micList.append(mic) print("Adding mic: %s, id: %s" % (key, mic.getObjId())) for x, y in coords: coord = emobj.Coordinate(x=x, y=y) coord.setMicrograph(mic) coordSet.append(coord) # Get boxDirectory boxFolder = self.getOutputPath('boxFolder') os.mkdir(boxFolder) micFolder = self.getOutputPath('micFolder') pwutils.makePath(micFolder) # Invoke the write set of coordinates method convert.writeSetOfCoordinates(boxFolder, coordSet) convert.convertMicrographs(micList, micFolder) # Assert output of writesetofcoordinates for mic in micList: boxFile = os.path.join(boxFolder, convert.getMicIdName(mic, '.box')) self.assertTrue(os.path.exists(boxFile), 'Missing box file: %s' % boxFile) micFile = os.path.join(micFolder, convert.getMicIdName(mic, '.mrc')) self.assertTrue(os.path.exists(micFile), 'Missing box file: %s' % micFile) # Assert coordinates in box files fh = open(os.path.join(boxFolder, 'mic00001.box')) box1 = fh.readline() fh.close() box1 = box1.split('\t') self.assertEquals(box1[0], '0') self.assertEquals(box1[1], '964')
def _convert(coordsIn, yFlipHeight=None): tmpFile = os.path.join(boxDir, 'tmp.cbox') # Write input coordinates writer = convert.CoordBoxWriter(boxSize, yFlipHeight=yFlipHeight) writer.open(tmpFile) for x, y, _ in coordsIn: writer.writeCoord(emobj.Coordinate(x=x, y=y)) writer.close() reader = convert.CoordBoxReader(boxSize, yFlipHeight=yFlipHeight) reader.open(tmpFile) coordsOut = [c for c in reader.iterCoords()] reader.close() return coordsOut
def readMicrographCoords(mic, coordSet, coordsFile, boxSize, yFlipHeight=None, boxSizeEstimated=False): reader = CoordBoxReader(boxSize, yFlipHeight=yFlipHeight, boxSizeEstimated=boxSizeEstimated) reader.open(coordsFile) coord = emobj.Coordinate() for x, y, score in reader.iterCoords(): # Clean up objId to add as a new coordinate coord.setObjId(None) coord.setPosition(x, y) coord.setMicrograph(mic) coord._cryoloScore = emobj.Float(score) # Add it to the set coordSet.append(coord) reader.close()
def rowToCoordinate(coordRow): """ Create a Coordinate from a row of a meta """ # Check that all required labels are present in the row if coordRow.containsAll(COOR_DICT): coord = pwobj.Coordinate() rowToObject(coordRow, coord, COOR_DICT, extraLabels=COOR_EXTRA_LABELS) micName = None if coordRow.hasLabel(md.RLN_MICROGRAPH_ID): micId = int(coordRow.get(md.RLN_MICROGRAPH_ID)) coord.setMicId(micId) # If RLN_MICROGRAPH_NAME is not present, use the id as a name micName = micId if coordRow.hasLabel(md.RLN_MICROGRAPH_NAME): micName = coordRow.get(md.RLN_MICROGRAPH_NAME) coord.setMicName(micName) else: coord = None return coord
def extractCoordinates(self, partsIds=None): inPart = self.getInputParticles() inMics = self.getInputMicrographs() scale = inPart.getSamplingRate() / inMics.getSamplingRate() print("Scaling coordinates by a factor *%0.2f*" % scale) alignType = inPart.getAlignment() suffix = self.getSuffix(partsIds[0]) if partsIds is not None else '' outputCoords = self._createSetOfCoordinates(inMics, suffix=suffix) # Prepare a double key dictionary to do the match: micname and micId micDict = dict() for mic in inMics.iterItems(): # Clone the mics! otherwise we will get pointers and # will end up with the same mic in the dictionary. clonedMic = mic.clone() micDict[clonedMic.getObjId()] = clonedMic micDict[clonedMic.getMicName()] = clonedMic def appendCoordFromParticle(part): coord = part.getCoordinate() # Try micname micName = coord.getMicName() mic = micDict.get(micName, None) # Try micid if mic is None: micKey = coord.getMicId() mic = micDict.get(micKey, None) if mic is None: print("Skipping particle, %s or id %s not found" % (micName, micKey)) else: newCoord.copyObjId(part) x, y = coord.getPosition() if self.applyShifts: # Get the shifts, they are returned with the sign reverted shifts = self.getShifts(part.getTransform(), alignType) # Add the shifts (values are inverted so subtract) x -= shifts[0] y -= shifts[1] # Apply the scale x *= scale y *= scale # Round coordinates to closer integer 39.9 --> 40 and not 39 finalX = round(x) finalY = round(y) # Annotate fractions if shifts applied if self.applyShifts: newCoord.xFrac = Float(finalX - x) newCoord.yFrac = Float(finalY - y) newCoord.setPosition(finalX, finalY) newCoord.setMicrograph(mic) outputCoords.append(newCoord) newCoord = emobj.Coordinate() if self.streamingModeOn: for partId in partsIds: particle = inPart[partId] appendCoordFromParticle(particle) else: for particle in inPart: appendCoordFromParticle(particle) boxSize = inPart.getXDim() * scale outputCoords.setBoxSize(boxSize) return outputCoords
def createCoorSetProtocol(self): MICNUMBER = 2 MICSIZE = 1000 # create dummy protocol dummyProt = self.newProtocol(EMProtocol) dummyProt.setObjLabel('dummy protocol') self.launchProtocol(dummyProt) # create set of micrographs # create two tif image matrix = np.zeros((MICSIZE, MICSIZE), np.uint8) R = np.linspace(0, MICSIZE // 2 - 2, 20) # note we have 0 and 2*pi for each radius THETA = np.linspace(0, 2 * np.pi, 30) radii, thetas = np.meshgrid(R, THETA) X = (R * np.cos(thetas)).astype(int) + MICSIZE // 2 Y = (R * np.sin(thetas)).astype(int) + MICSIZE // 2 # TODO: next double loop should be equivalen to # the more elegant solution # pairlist = np.vstack(list(map(np.ravel, (X,Y)))).T # matrix[pairList] = 255 but it is not for xx, yy in zip(X, Y): for x, y in zip(xx, yy): matrix[y][x] = 255 fn = dummyProt._getExtraPath('mic_000001.tif') Image.fromarray(matrix).save(fn) fn = dummyProt._getExtraPath('mic_000002.tif') Image.fromarray(matrix).save(fn) micSet = dummyProt._createSetOfMicrographs() mic = emobj.Micrograph() for i in range(MICNUMBER): mic.setFileName\ (dummyProt._getExtraPath("mic_%06d.tif" % (i%2 + 1 ))) mic.setMicName("mic_%06d.tif" % (i % 2 + 1)) mic.setSamplingRate(1.) micSet.append(mic) mic.cleanObjId() micSet.setSamplingRate(1.) micSet.write() coordSet = dummyProt._createSetOfCoordinates(micSet) coordSet.setBoxSize(10) coord = emobj.Coordinate() for xx, yy in zip(X, Y): for x, y in zip(xx, yy): for mic in range(1, MICNUMBER + 1): coord.setX(x) coord.setY(y) coord.setMicId(mic) coord.setMicName("mic_%06d.tif" % (mic)) coordSet.append(coord) coord.cleanObjId() coordSet.write() outputArgs = {OUTPUT_COORDINATES: coordSet, 'outputMic': micSet} dummyProt._defineOutputs(**outputArgs) dummyProt._store() return dummyProt
def test_mapper(self): """ test that indexes are created when a setOfCoordinates is created """ PARTNUMBER = 10 MICNUMBER = 600 NUMBERCOORDINATES = PARTNUMBER * MICNUMBER indexesNames = ['_micId'] prot = createDummyProtocol("dummy_protocol") # create set of micrographs micSet = emobj.SetOfMicrographs(filename=":memory:") mic = emobj.Micrograph() for i in range(NUMBERCOORDINATES): mic.setLocation(i, "mic_%06d.mrc" % i) micSet.append(mic) mic.cleanObjId() micSet.write() # create a set of particles coordSet = prot._createSetOfCoordinates(micSet) coord = emobj.Coordinate() for i in range(NUMBERCOORDINATES): coordSet.append(coord) coord.cleanObjId() coordSet.write() # check defined indexes setOfCoordinatesFileName = \ prot._getPath("coordinates.sqlite") print(os.path.abspath(setOfCoordinatesFileName)) indexes = sorted( [index[1] for index in getIndex(setOfCoordinatesFileName)]) for index, indexName in zip(indexes, indexesNames): self.assertEqual(index, 'index_' + indexName) # Test speed: based on loop in file protocol_extractparticles.py # for 600 mic and 100 part the values for the first # second and third case where: # Loop with index: 5 sec # Loop no index: 8:01 min # Loop optimized code: 4 sec # for 6000 mic and 200 part the values for the first # Loop with index: 1:47 min # optimized Loop with index: 1:20 min # Loop no index: after several hours I stopped the process SPEEDTEST = True if SPEEDTEST: # code from protocol_particles. line 415 testTimer = pwutils.Timer() testTimer.tic() for mic in micSet: micId = mic.getObjId() coordList = [] for coord in coordSet.iterItems(where='_micId=%s' % micId): coordList.append(coord.clone()) testTimer.toc("Loop with INDEX took:") lastMicId = None testTimer.tic() for coord in coordSet.iterItems(orderBy='_micId', direction='ASC'): micId = coord.getMicId() if micId != lastMicId: lastMicId = micId coordList = [] coordList.append(coord.clone()) testTimer.toc("Loop with INDEX and proper code, took:") # delete INDEX, this will not work # if database is not sqlite conn = sqlite3.connect(setOfCoordinatesFileName) cur = conn.cursor() for index in indexesNames: cur.execute("DROP INDEX index_%s" % index) cur.close() conn.close() testTimer.tic() for mic in micSet: micId = mic.getObjId() coordList = [] for coord in coordSet.iterItems(where='_micId=%s' % micId): coordList.append(coord.clone()) testTimer.toc("Loop with NO INDEX took:") lastMicId = None testTimer.tic() for coord in coordSet.iterItems(orderBy='_micId', direction='ASC'): micId = coord.getMicId() if micId != lastMicId: lastMicId = micId coordList = [] coordList.append(coord.clone()) testTimer.toc("Loop with NO INDEX but proper code, took:")