def _showChanges(self, paramName=None): labels = ['rlnIterationNumber'] + self.protocol.CHANGE_LABELS tableChanges = Table(columns=labels) print( "Computing average changes in offset, angles, and class membership" ) for it in self._getAllIters(): fn = self.protocol._getFileName('optimiser', iter=it) if not os.path.exists(fn): continue print("Computing data for iteration; %03d" % it) fn = self.protocol._getFileName('optimiser', iter=it) table = Table(fileName=fn, tableName='optimiser_general') row = table[0] cols = [ getattr(row, value) for value in self.protocol.CHANGE_LABELS ] tableChanges.addRow(it, *cols) fn = self.protocol._getFileName('all_changes') with open(fn, 'w') as f: tableChanges.writeStar(f) return [self.createDataView(fn)]
def _showPMax(self, paramName=None): labels = ['rlnIterationNumber', 'rlnAveragePmax', 'rlnLogLikelihood'] tablePMax = Table(columns=labels) for it in self._getAllIters(): if it == 1: # skip iter1 with Pmax=1 continue # always list all iterations prefix = self.protocol.PREFIXES[0] fn = self.protocol._getFileName(prefix + 'model', iter=it) table = Table(fileName=fn, tableName='model_general') row = table[0] tablePMax.addRow(int(it), float(row.rlnAveragePmax), float(row.rlnLogLikelihood)) fn = self.protocol._getFileName('all_avgPmax') with open(fn, 'w') as f: tablePMax.writeStar(f) xplotter = RelionPlotter() xplotter.createSubPlot("Avg PMax per Iterations", "Iterations", "Avg PMax") xplotter.plotMd(tablePMax, 'rlnIterationNumber', 'rlnAveragePmax') xplotter.showLegend(['rlnAveragePmax']) return [self.createDataView(fn), xplotter]
def _write(self, f): # Create columns from the first row items = self.first()._asdict().items() cols = [Table.Column(k, type(v)) for k, v in items] t = Table(columns=cols) for og in self._dict.values(): t.addRow(*og) t.writeStar(f, tableName='optics')
def convertInputStep(self, newMics, numPass): """ Create a star file as expected by cryoassess.""" micsTable = Table(columns=['rlnMicrographName']) for mic in newMics: micsTable.addRow(os.path.abspath(mic.getFileName())) with open(self.getInputFilename(numPass), 'w') as f: f.write("# Star file generated with Scipion\n") micsTable.writeStar(f, tableName='') self.appendTotalInputStar(numPass)
def writeCoordsConfig(configFn, boxSize, state): """ Write the config.xmd file needed for Xmipp picker. Params: configFn: The filename were to store the configuration. boxSize: the box size in pixels for extraction. state: picker state """ # Write config.xmd metadata print("writeCoordsConfig: state=", state) table = Table(columns=['particleSize', 'pickingState']) table.addRow(int(boxSize), state) table.write(configFn, tableName='properties')
def test_write_singleRow(self): fn = '/tmp/test-single-row.star' print("Writing a single row to %s..." % fn) t = Table() f1 = StringIO(one_micrograph_mc) t.readStar(f1, tableName='global_shift') t.writeStar(sys.stdout, tableName='global_shift', singleRow=True) t = Table(columns=['rlnImageSizeX', 'rlnImageSizeY', 'rlnMicrographMovieName']) t.addRow(3710, 3838, 'Movies/14sep05c_00024sq_00003hl_00002es.frames.out.mrc') with open(fn, 'w') as f: t.writeStar(f, singleRow=True)
def addColumns(self, **kwargs): """ Add new columns with default values (type inferred from it). """ items = self.first()._asdict().items() cols = [Table.Column(k, type(v)) for k, v in items] for k, v in kwargs.items(): cols.append(Table.Column(k, type(v))) t = Table(columns=cols) for og in self._dict.values(): values = og._asdict() values.update(kwargs) t.addRow(**values) self.__fromTable(t)
def createFinalFilesStep(self): # -----metadata to save all final models------- finalModel = self._getFileName('finalModel') finalModelMd = self._getMetadata() # -----metadata to save all final particles----- finalData = self._getFileName('finalData') fn = self._getFileName('rawFinalData') print("FN: ", fn) tableIn = Table(fileName=fn, tableName='particles') cols = [str(c) for c in tableIn.getColumnNames()] ouTable = Table(columns=cols, tableName='particles') for rLev in self._getRLevList(): it = self._lastIter(rLev) modelFn = self._getFileName('model', iter=it, lev=self._level, rLev=rLev) modelMd = self._getMetadata('model_classes@' + modelFn) refLabel = md.RLN_MLMODEL_REF_IMAGE imgRow = md.getFirstRow(modelMd) fn = imgRow.getValue(refLabel) mapId = self._getRunLevId(rLev=rLev) newMap = self._getMapById(mapId) imgRow.setValue(refLabel, newMap) copyFile(fn, newMap) self._mapsDict[fn] = mapId imgRow.addToMd(finalModelMd) dataFn = self._getFileName('data', iter=it, lev=self._level, rLev=rLev) pTable = Table() for row in pTable.iterRows(dataFn, tableName='particles'): newRow = row._replace(rlnClassNumber=rLev) ouTable.addRow(*newRow) self.writeStar(finalData, ouTable) finalModelMd.write('model_classes@' + finalModel)
def mergeClassesStep(self): if self.doGrouping: from cryomethods.functions import NumpyImgHandler npIh = NumpyImgHandler() makePath(self._getLevelPath(self._level)) listVol = self._getFinalMaps() matrix = npIh.getAllNpList(listVol, 2) labels = self._clusteringData(matrix) clsChange = 0 prevStar = self._getFileName('rawFinalData') pTable = Table() origStar = self._getFileName('input_star', lev=1, rLev=1) opticsTable = Table(fileName=origStar, tableName='optics') print("OPTABLE: ", origStar, opticsTable.size()) for row in pTable.iterRows(prevStar, key="rlnClassNumber", tableName='particles'): clsPart = row.rlnClassNumber newClass = labels[clsPart - 1] + 1 newRow = row._replace(rlnClassNumber=newClass) if not newClass == clsChange: if not clsChange == 0: self.writeStar(fn, ouTable, opticsTable) clsChange = newClass fn = self._getFileName('input_star', lev=self._level, rLev=newClass) tableIn = Table(fileName=prevStar, tableName='particles') cols = [str(c) for c in tableIn.getColumnNames()] ouTable = Table(columns=cols, tableName='particles') ouTable.addRow(*newRow) print("mergeClassesStep ouTable.size: ", ouTable.size()) self.writeStar(fn, ouTable, opticsTable) else: prevData = self._getFileName('rawFinalData') finalData = self._getFileName('finalData') prevModel = self._getFileName('rawFinalModel') finalModel = self._getFileName('finalModel') copyFile(prevData, finalData) copyFile(prevModel, finalModel)
def test_addRows(self): print("Checking addRows...") t1 = Table() f1 = StringIO(particles_3d_classify) t1.readStar(f1) nRows = len(t1) lastRow = t1[-1] values = [378.000000, 2826.000000, 5.360625, 4, -87.35289, "000100@Extract/job012/Movies/20170629_00021_frameImage.mrcs", "MotionCorr/job002/Movies/20170629_00021_frameImage.mrc", 1, 4.809192, 0.131159, 10864.146484, 10575.793945, 77.995003, 0.000000, 1.000000, 0.000000, 1, 81.264321, 138.043147, 4.959233, -2.12077, 0.798727, 10937.130965, 0.998434, 5 ] for i in range(1, 4): values[4] = nRows + 1 t1.addRow(*values) self.assertEqual(nRows + 3, len(t1)) newLastRow = t1[-1] self.assertEqual(len(lastRow), len(newLastRow))
def _mergeDataStar(self, rLev, callback): def _getMapId(rMap): try: return self._mapsDict[rMap] except: return None iters = self._lastIter(rLev) #metadata to save all particles that continues outData = self._getFileName('outputData', lev=self._level) #metadata to save all final particles finalData = self._getFileName('rawFinalData') imgStar = self._getFileName('data', iter=iters, lev=self._level, rLev=rLev) opTable = Table(filename=imgStar, tableName='optics') tableIn = Table(fileName=imgStar, tableName='particles') print("IMGSTAR: ", imgStar, "PARTS: ", tableIn.size()) cols = [str(c) for c in tableIn.getColumnNames()] outTable = Table(columns=cols, tableName='particles') finalTable = Table(columns=cols, tableName='particles') if os.path.exists(outData): print("Exists ", outData) tmpTable = Table() for row in tmpTable.iterRows(outData, tableName='particles'): outTable.addRow(*row) if os.path.exists(finalData): print("Exists ", finalData) tpTable = Table() for row in tpTable.iterRows(finalData, tableName='particles'): finalTable.addRow(*row) pTable = Table() for row in pTable.iterRows(imgStar, key="rlnClassNumber", tableName='particles'): clsPart = row.rlnClassNumber rMap = callback(iters, rLev, clsPart) mapId = _getMapId(rMap) while mapId is None: for clsPart in range(1, self.numberOfClasses.get()+1): rMap = callback(iters, rLev, clsPart) mapId = _getMapId(rMap) if mapId is not None: break if self.stopDict[mapId]: # if mapId != newMapId: # if newMapId != '00.000': # print(mdClass) # mdClass.write(classMd) # classMd = self._getFileName('mdataForClass', id=mapId) # mdClass = self._getMetadata(classMd) # newMapId = mapId classId = self._clsIdDict[mapId] newRow = row._replace(rlnClassNumber=classId) finalTable.addRow(*newRow) else: classId = int(mapId.split('.')[1]) newRow = row._replace(rlnClassNumber=classId) outTable.addRow(*newRow) # if self.stopDict[mapId]: # if mdClass.size() != 0: # mdClass.write(classMd) if finalTable.size() != 0: print("finalTable.size: ", finalTable.size()) self.writeStar(finalData, finalTable) if outTable.size() != 0: print("outTable.size: ", outTable.size()) self.writeStar(outData, outTable, opTable)
def convertInputStep(self, movId, partId, postId): inputMovies = self.inputMovies.get() inputParts = self.inputParticles.get() imgStar = self._getPath('input_particles.star') inputPartsFolder = self._getInputPath('particles') pwutils.makePath(inputPartsFolder) self.info("Converting set from '%s' into '%s'" % (inputParts.getFileName(), imgStar)) tableMovies = Table( columns=['rlnMicrographName', 'rlnMicrographMetadata']) tableGeneral = Table(columns=[ 'rlnImageSizeX', 'rlnImageSizeY', 'rlnImageSizeZ', 'rlnMicrographMovieName', 'rlnMicrographBinning', 'rlnMicrographOriginalPixelSize', 'rlnMicrographDoseRate', 'rlnMicrographPreExposure', 'rlnVoltage', 'rlnMicrographStartFrame', 'rlnMotionModelVersion' ]) tableShifts = Table(columns=[ 'rlnMicrographFrameNumber', 'rlnMicrographShiftX', 'rlnMicrographShiftY' ]) tableCoeffs = Table( columns=['rlnMotionModelCoeffsIdx', 'rlnMotionModelCoeff']) # Create the first row, later only the movieName will be updated xdim, ydim, ndim = inputMovies.getDim() acq = inputMovies.getAcquisition() firstMovie = inputMovies.getFirstItem() a0, aN = firstMovie.getAlignment().getRange() moviesPixelSize = inputMovies.getSamplingRate() binningFactor = inputParts.getSamplingRate() / moviesPixelSize hasLocal = firstMovie.hasAttribute('_rlnMotionModelCoeff') motionMode = 1 if hasLocal else 0 tableGeneral.addRow(xdim, ydim, ndim, 'movieName', binningFactor, moviesPixelSize, acq.getDosePerFrame(), acq.getDoseInitial(), acq.getVoltage(), a0, motionMode) row = tableGeneral[0] for movie in inputMovies: movieFn = movie.getFileName() movieBase = os.path.basename(movieFn) movieStar = self._getInputPath( pwutils.replaceBaseExt(movieFn, 'star')) tableMovies.addRow(movieBase, movieStar) with open(movieStar, 'w') as f: # Update Movie name tableGeneral[0] = row._replace(rlnMicrographMovieName=movieFn) tableGeneral.writeStar(f, tableName='general', singleRow=True) # Write shifts tableShifts.clearRows() alignment = movie.getAlignment() shiftsX, shiftsY = alignment.getShifts() a0, aN = alignment.getRange() empty = -9999.000 for i in range(1, a0): tableShifts.addRow(i, empty, empty) # Adjust the shifts to be relative to the first frame # so let's add the opposite value xoff, yoff = -shiftsX[0], -shiftsY[0] for i in range(a0, aN + 1): tableShifts.addRow(i, shiftsX[i - a0] + xoff, shiftsY[i - a0] + yoff) for i in range(aN + 1, ndim + 1): tableShifts.addRow(i, empty, empty) tableShifts.writeStar(f, tableName='global_shift') # Write coefficients if hasLocal: coeffs = movie.getAttributeValue('_rlnMotionModelCoeff', '') tableCoeffs.clearRows() for i, c in enumerate(json.loads(coeffs)): tableCoeffs.addRow(i, c) tableCoeffs.writeStar(f, tableName='local_motion_model') with open(self._getPath('input_corrected_micrographs.star'), 'w') as f: tableMovies.writeStar(f) convert.writeSetOfParticles(inputParts, imgStar, outputDir=inputPartsFolder, alignType=ALIGN_PROJ, fillMagnification=True)
def run_job(args): start = time.time() in_mics = args.in_mics job_dir = args.out_dir thresh = args.threshold box_size = args.box_size distance = 0 model = args.model filament = args.filament if filament: box_dist = args.box_distance min_boxes = args.minimum_number_boxes denoise = args.denoise gpus = args.gpu threads = args.threads if SCRATCH_DIR is not None: filtered_dir = os.path.join(SCRATCH_DIR, "filtered_tmp") else: filtered_dir = "%s/filtered_tmp/" % job_dir if model == "None": model = CRYOLO_GEN_MODEL if not denoise else CRYOLO_GEN_JANNI_MODEL else: model = os.path.abspath(model) # Making a cryolo config file json_dict = { "model": { "architecture": "PhosaurusNet", "input_size": 1024, "max_box_per_image": 600, "filter": [0.1, filtered_dir] }, "other": { "log_path": "%s/logs/" % job_dir } } if box_size: # is not 0 json_dict["model"]["anchors"] = [int(box_size), int(box_size)] if not filament: distance = int(box_size / 2) # use half the box_size if denoise: json_dict["model"]["filter"] = [ CRYOLO_JANNI_MODEL, 24, 3, filtered_dir ] if DEBUG: print("Using following config: ", json_dict) with open(os.path.join(job_dir, "config_cryolo.json"), "w") as json_file: json.dump(json_dict, json_file, indent=4) # Reading the micrographs star file from Relion mictable = Table(fileName=in_mics, tableName='micrographs') mic_fns = mictable.getColumnValues("rlnMicrographName") # Launching cryolo args_dict = { '--conf': os.path.join(job_dir, "config_cryolo.json"), '--input': in_mics, '--output': os.path.join(job_dir, 'output'), '--weights': model, '--gpu': gpus.replace(',', ' '), '--threshold': thresh, '--distance': distance, '--cleanup': "", '--skip': "", '--write_empty': "", '--num_cpu': -1 if threads == 1 else threads } if filament: args_dict.update({ '--filament': "", '--box_distance': box_dist, '--minimum_number_boxes': min_boxes, '--directional_method': 'PREDICTED' }) args_dict.pop('--distance') cmd = "%s && %s " % (CONDA_ENV, CRYOLO_PREDICT) cmd += " ".join(['%s %s' % (k, v) for k, v in args_dict.items()]) print("Running command:\n{}".format(cmd)) proc = subprocess.Popen(cmd, shell=True) proc.communicate() if proc.returncode: raise Exception("Command failed with return code %d" % proc.returncode) # Moving output star files for Relion to use table_coords = Table( columns=['rlnMicrographName', 'rlnMicrographCoordinates']) star_dir = "EMAN_HELIX_SEGMENTED" if filament else "STAR" ext = ".box" if filament else ".star" with open(os.path.join(job_dir, "autopick.star"), "w") as mics_star: for mic in mic_fns: mic_base = os.path.basename(mic) mic_dir = os.path.dirname(mic) if len(mic_dir.split("/")) > 1 and "job" in mic_dir.split( "/")[1]: # remove JobType/jobXXX mic_dir = "/".join(mic_dir.split("/")[2:]) os.makedirs(os.path.join(job_dir, mic_dir), exist_ok=True) coord_cryolo = os.path.splitext(mic_base)[0] + ext coord_cryolo = os.path.join(job_dir, "output", star_dir, coord_cryolo) coord_relion = os.path.splitext(mic_base)[0] + "_autopick" + ext coord_relion = os.path.join(job_dir, mic_dir, coord_relion) if os.path.exists(coord_cryolo): os.rename(coord_cryolo, coord_relion) table_coords.addRow(mic, coord_relion) if DEBUG: print("Moved %s to %s" % (coord_cryolo, coord_relion)) table_coords.writeStar(mics_star, tableName='coordinate_files') # Required output to mini pipeline job_pipeline.star file pipeline_fn = os.path.join(job_dir, "job_pipeline.star") table_gen = Table(columns=['rlnPipeLineJobCounter']) table_gen.addRow(2) table_proc = Table(columns=[ 'rlnPipeLineProcessName', 'rlnPipeLineProcessAlias', 'rlnPipeLineProcessTypeLabel', 'rlnPipeLineProcessStatusLabel' ]) table_proc.addRow(job_dir, 'None', 'relion.external', 'Running') table_nodes = Table( columns=['rlnPipeLineNodeName', 'rlnPipeLineNodeTypeLabel']) table_nodes.addRow(in_mics, "MicrographsData.star.relion") table_nodes.addRow(os.path.join(job_dir, "autopick.star"), "MicrographsCoords.star.relion.autopick") table_input = Table( columns=['rlnPipeLineEdgeFromNode', 'rlnPipeLineEdgeProcess']) table_input.addRow(in_mics, job_dir) table_output = Table( columns=['rlnPipeLineEdgeProcess', 'rlnPipeLineEdgeToNode']) table_output.addRow(job_dir, os.path.join(job_dir, "autopick.star")) with open(pipeline_fn, "w") as f: table_gen.writeStar(f, tableName="pipeline_general", singleRow=True) table_proc.writeStar(f, tableName="pipeline_processes") table_nodes.writeStar(f, tableName="pipeline_nodes") table_input.writeStar(f, tableName="pipeline_input_edges") table_output.writeStar(f, tableName="pipeline_output_edges") # Register output nodes in .Nodes/ os.makedirs(os.path.join(".Nodes", "MicrographsCoords", job_dir), exist_ok=True) open(os.path.join(".Nodes", "MicrographsCoords", job_dir, "autopick.star"), "w").close() outputFn = os.path.join(job_dir, "output_for_relion.star") if not os.path.exists(outputFn): # get estimated box size summaryfn = os.path.join(job_dir, "output/DISTR", 'size_distribution_summary*.txt') with open(glob(summaryfn)[0]) as f: for line in f: if line.startswith("MEAN,"): estim_sizepx = int(line.split(",")[-1]) break print("\ncrYOLO estimated box size %d px" % estim_sizepx) # calculate diameter, original (boxSize) and downsampled (boxSizeSmall) box optics = Table(fileName=in_mics, tableName='optics') angpix = float(optics[0].rlnMicrographPixelSize) if filament: # box size = 1.5x tube diam diam = 0.66 * box_size else: # use + 20% for diameter diam = math.ceil(estim_sizepx * angpix * 1.2) # use +30% for box size, make it even boxSize = 1.3 * estim_sizepx boxSize = math.ceil(boxSize / 2.) * 2 # from relion_it.py script # Authors: Sjors H.W. Scheres, Takanori Nakane & Colin M. Palmer boxSizeSmall = None for box in (48, 64, 96, 128, 160, 192, 256, 288, 300, 320, 360, 384, 400, 420, 450, 480, 512, 640, 768, 896, 1024): # Don't go larger than the original box if box > boxSize: boxSizeSmall = boxSize break # If Nyquist freq. is better than 7.5 A, use this # downscaled box, otherwise continue to next size up small_box_angpix = angpix * boxSize / box if small_box_angpix < 3.75: boxSizeSmall = box break print( "\nSuggested parameters:\n\tDiameter (A): %d\n\tBox size (px): %d\n" "\tBox size binned (px): %d" % (diam, boxSize, boxSizeSmall)) # output all params into a star file tableCryolo = Table(columns=[ 'rlnParticleDiameter', 'rlnOriginalImageSize', 'rlnImageSize' ]) tableCryolo.addRow(diam, boxSize, boxSizeSmall) with open(outputFn, "w") as f: tableCryolo.writeStar(f, tableName='picker') # create .gui_manualpickjob.star for easy display starString = """ # version 30001 data_job _rlnJobTypeLabel relion.manualpick%s _rlnJobIsContinue 0 _rlnJobIsTomo 0 # version 30001 data_joboptions_values loop_ _rlnJobOptionVariable #1 _rlnJobOptionValue #2 angpix %f black_val 0 blue_value 0 color_label rlnParticleSelectZScore diameter %d do_color No do_fom_threshold No do_queue No do_startend No fn_color "" fn_in "" highpass -1 lowpass 20 micscale 0.2 min_dedicated 1 minimum_pick_fom 0 other_args "" qsub qsub qsubscript /public/EM/RELION/relion/bin/relion_qsub.csh queuename openmpi red_value 2 sigma_contrast 3 white_val 0 """ label = ".helical" if filament else "" with open(".gui_manualpickjob.star", "w") as f: f.write(starString % (label, angpix, diam)) end = time.time() diff = end - start print("Job duration = %dh %dmin %dsec \n" % (diff // 3600, diff // 60 % 60, diff % 60))
def convertInputStep(self, resetDeps, copyAlignment): import random """ Create the input file in STAR format as expected by Relion. If the input particles comes from Relion, just link the file. """ if self._level == 0: makePath(self._getRunPath(self._level, 1)) imgStar = self._getFileName('input_star', lev=self._level, rLev=0) self._convertStar(copyAlignment, imgStar) opticsTable = Table(fileName=imgStar, tableName='optics') partsTable = Table(fileName=imgStar, tableName='particles') self._convertVol(ImageHandler(), self.inputVolumes.get()) mdSize = partsTable.size() for i in range(9, 1, -1): makePath(self._getRunPath(self._level, i)) mStar = self._getFileName('input_star', lev=self._level, rLev=i) size = 10000 * i if mdSize >= 100000 else int(mdSize * 0.1 * i) print("partsTable: ", size, i, mdSize) partsTable._rows = random.sample(partsTable._rows, k=size) self.writeStar(mStar, partsTable, opticsTable) elif self._level == 1: imgStar = self._getFileName('input_star', lev=self._level, rLev=1) makePath(self._getRunPath(self._level, 1)) self._convertStar(copyAlignment, imgStar) # find a clever way to avoid volume conversion if its already done. self._convertVol(ImageHandler(), self.inputVolumes.get()) else: lastCls = None prevStar = self._getFileName('outputData', lev=self._level - 1) firstStarFn = self._getFileName('input_star', lev=1, rLev=1) # mdData = md.MetaData(prevStar) opTable = Table(fileName=firstStarFn, tableName='optics') tableIn = Table(fileName=prevStar, tableName='particles') cols = [str(c) for c in tableIn.getColumnNames()] pTable = Table() for row in pTable.iterRows(prevStar, key="rlnClassNumber", tableName='particles'): clsPart = row.rlnClassNumber if clsPart != lastCls: makePath(self._getRunPath(self._level, clsPart)) if lastCls is not None: print("writing %s" % fn) # mdInput.write(fn) self.writeStar(fn, newPTable, opTable) paths = self._getRunPath(self._level, clsPart) makePath(paths) print ("Path: %s and newRlev: %d" % (paths, clsPart)) lastCls = clsPart newPTable = Table(columns=cols, tableName='particles') fn = self._getFileName('input_star', lev=self._level, rLev=clsPart) # objId = mdInput.addObject() newPTable.addRow(*row) # row.writeToMd(mdInput, objId) print("writing %s and ending the loop" % fn) self.writeStar(fn, newPTable, opTable)
def run_job(project_dir, args): start = time.time() in_parts = args.in_parts job_dir = args.out_dir model = args.model or CRYOLO_GEN_MODEL gpus = args.gpu getPath = lambda *arglist: os.path.join(project_dir, *arglist) if SCRATCH_DIR is not None: filtered_dir = os.path.join(SCRATCH_DIR, "filtered_tmp") else: filtered_dir = "%s/filtered_tmp/" % job_dir # Create folder structure for cryolo os.mkdir(IMG_FOLDER) os.mkdir(ANNOT_FOLDER) # Reading the box size from relion optics = Table(fileName=getPath(in_parts), tableName='optics')[0] box_bin = int(optics.rlnImageSize) box_size = float(optics.rlnImagePixelSize) // float(optics.rlnMicrographOriginalPixelSize) * box_bin print("Using unbinned box size of %d px" % box_size) # Making a cryolo config file json_dict = { "model": { "architecture": "PhosaurusNet", "input_size": 1024, "max_box_per_image": 600, "anchors": [box_size, box_size], "filter": [ 0.1, filtered_dir ] }, "train": { "train_image_folder": IMG_FOLDER, "train_annot_folder": ANNOT_FOLDER, "train_times": 10, "batch_size": 6, "learning_rate": 0.0001, "nb_epoch": 200, "object_scale": 5.0, "no_object_scale": 1.0, "coord_scale": 1.0, "class_scale": 1.0, "pretrained_weights": "%s" % model, "saved_weights_name": getPath(job_dir, TUNE_MODEL), "debug": True }, "valid": { "valid_image_folder": "", "valid_annot_folder": "", "valid_times": 1 } } if DEBUG: print("Using following config: ", json_dict) with open("config_cryolo.json", "w") as json_file: json.dump(json_dict, json_file, indent=4) # Reading the particles from relion try: parttable = Table(fileName=getPath(in_parts), tableName='particles') except: print("Could not read particles table from %s. Stopping" % in_parts) return mics_dict = {} # Arranging files for cryolo: making symlinks for mics and creating box files for row in parttable: mic = row.rlnMicrographName xCoord = int(int(row.rlnCoordinateX) - box_size / 2) yCoord = int(int(row.rlnCoordinateY) - box_size / 2) if mic in mics_dict: mics_dict[mic].append((xCoord, yCoord)) else: mics_dict[mic] = [(xCoord, yCoord)] for mic in mics_dict: micSrc = getPath(mic) micDst = getPath(job_dir, IMG_FOLDER, os.path.basename(mic)) if not os.path.exists(micDst): os.symlink(micSrc, micDst) if DEBUG: print("Link %s --> %s" % (micSrc, micDst)) box = os.path.splitext(micDst)[0] + ".box" box = box.replace(IMG_FOLDER, ANNOT_FOLDER) with open(box, "w") as f: for coords in mics_dict[mic]: f.write("%s\t%s\t%s\t%s\n" % (coords[0], coords[1], box_size, box_size)) if DEBUG: print("Created box file: %s" % box) # Launching cryolo args_dict = { '--conf': "config_cryolo.json", '--gpu': gpus.replace(',', ' '), '--warmup': 0, '--fine_tune': "", '--cleanup': "" } cmd = "%s && %s " % (CONDA_ENV, CRYOLO_TRAIN) cmd += " ".join(['%s %s' % (k, v) for k, v in args_dict.items()]) print("Running command:\n{}".format(cmd)) proc = subprocess.Popen(cmd, shell=True) proc.communicate() if proc.returncode: raise Exception("Command failed with return code %d" % proc.returncode) # Required output job_pipeline.star file pipeline_fn = getPath(job_dir, "job_pipeline.star") table_gen = Table(columns=['rlnPipeLineJobCounter']) table_gen.addRow(2) table_proc = Table(columns=['rlnPipeLineProcessName', 'rlnPipeLineProcessAlias', 'rlnPipeLineProcessTypeLabel', 'rlnPipeLineProcessStatusLabel']) table_proc.addRow(job_dir, 'None', 'relion.external', 'Running') table_nodes = Table(columns=['rlnPipeLineNodeName', 'rlnPipeLineNodeTypeLabel']) table_nodes.addRow(in_parts, "ParticlesData.star.relion") table_input = Table(columns=['rlnPipeLineEdgeFromNode', 'rlnPipeLineEdgeProcess']) table_input.addRow(in_parts, job_dir) with open(pipeline_fn, "w") as f: table_gen.writeStar(f, tableName="pipeline_general", singleRow=True) table_proc.writeStar(f, tableName="pipeline_processes") table_nodes.writeStar(f, tableName="pipeline_nodes") table_input.writeStar(f, tableName="pipeline_input_edges") end = time.time() diff = end - start print("Job duration = %dh %dmin %dsec \n" % (diff // 3600, diff // 60 % 60, diff % 60))
def run_job(project_dir, args): start = time.time() in_parts = args.in_parts job_dir = args.out_dir batch = args.batch_size gpu = args.gpu getPath = lambda *arglist: os.path.join(project_dir, *arglist) # Reading the model star file from relion modelstar = in_parts.replace("_data.star", "_model.star") refstable = Table(fileName=getPath(modelstar), tableName='model_classes') refstack = refstable[0].rlnReferenceImage.split("@")[-1] nrCls = int(refstable[-1].rlnReferenceImage.split("@")[0]) if DEBUG: print("Found input class averages stack: %s" % refstack) # Launching cryoassess args_dict = { '-i': getPath(refstack), '-o': getPath(job_dir, 'output'), '-b': batch, '-m': CRYOASSESS_2D_MODEL, } cmd = "%s && CUDA_VISIBLE_DEVICES=%s %s " % (CONDA_ENV, gpu, CRYOASSESS_2D) cmd += " ".join(['%s %s' % (k, v) for k, v in args_dict.items()]) print("Running command:\n{}".format(cmd)) proc = subprocess.Popen(cmd, shell=True) proc.communicate() if proc.returncode: raise Exception("Command failed with return code %d" % proc.returncode) # Parse output to get good classes IDs goodTemplate = getPath(job_dir, "output/Good/particle_*.jpg") regex = re.compile('particle_(\d*)\.jpg') goodcls = [] files = glob(goodTemplate) if files: for i in files: s = regex.search(i) goodcls.append(int(s.group(1))) if DEBUG: print("Parsing output files: %s\nGood classes: %s" % (goodTemplate, goodcls)) if len(goodcls) == 0: print("No good classes found. Job stopped.") end = time.time() diff = end - start print("Job duration = %dh %dmin %dsec \n" % (diff // 3600, diff // 60 % 60, diff % 60)) open(RELION_JOB_FAILURE_FILENAME, "w").close() exit(1) # Create output star file for Relion to use optics = Table(fileName=getPath(in_parts), tableName='optics') ptcls = Table(fileName=getPath(in_parts), tableName='particles') cols = ptcls.getColumnNames() out_ptcls = Table(columns=cols) for row in ptcls: if row.rlnClassNumber in goodcls: out_ptcls.addRow(*row) if DEBUG: print("Input particles: %d\nOutput particles: %d" % (len(ptcls), len(out_ptcls))) out_star = getPath(job_dir, "particles_for_training.star") with open(out_star, "w") as f: optics.writeStar(f, tableName="optics") out_ptcls.writeStar(f, tableName="particles") # Create backup_selection.star for results visualization sel = Table(columns=['rlnSelected']) for i in range(1, nrCls + 1): sel.addRow(1 if i in goodcls else 0) with open(getPath("backup_selection.star"), "w") as f: sel.writeStar(f, tableName="") end = time.time() diff = end - start print("Job duration = %dh %dmin %dsec \n" % (diff // 3600, diff // 60 % 60, diff % 60))
def convertInputStep(self, movId, partId, postId): inputMovies = self.inputMovies.get() inputParts = self.inputParticles.get() imgStar = self._getFileName('input_particles') inputPartsFolder = self._getInputPath('particles') pwutils.makePath(inputPartsFolder) self.info("Converting set from '%s' into '%s'" % (inputParts.getFileName(), imgStar)) tableGeneral = Table(columns=[ 'rlnImageSizeX', 'rlnImageSizeY', 'rlnImageSizeZ', 'rlnMicrographMovieName', 'rlnMicrographBinning', 'rlnMicrographOriginalPixelSize', 'rlnMicrographDoseRate', 'rlnMicrographPreExposure', 'rlnVoltage', 'rlnMicrographStartFrame', 'rlnMotionModelVersion', 'rlnMicrographGainName', 'rlnMicrographDefectFile' ]) tableShifts = Table(columns=[ 'rlnMicrographFrameNumber', 'rlnMicrographShiftX', 'rlnMicrographShiftY' ]) tableCoeffs = Table( columns=['rlnMotionModelCoeffsIdx', 'rlnMotionModelCoeff']) # Create the first row, later only the movieName will be updated xdim, ydim, ndim = inputMovies.getDim() acq = inputMovies.getAcquisition() firstMovie = inputMovies.getFirstItem() a0, aN = firstMovie.getAlignment().getRange() moviesPixelSize = inputMovies.getSamplingRate() binningFactor = inputParts.getSamplingRate() / moviesPixelSize og = convert.OpticsGroups.fromImages(inputMovies) writer = convert.createWriter(optics=og) writer.writeSetOfMicrographs(inputMovies, self._getFileName('input_mics'), postprocessImageRow=self._updateMic) tableGeneral.addRow(xdim, ydim, ndim, 'movieName', binningFactor, moviesPixelSize, acq.getDosePerFrame(), acq.getDoseInitial(), acq.getVoltage(), a0, 0, '""', '""') row = tableGeneral[0] for movie in inputMovies: movieStar = self._getMovieStar(movie) ogId = movie.getAttributeValue('_rlnOpticsGroup', 1) gainFn = og[ogId].get('rlnMicrographGainName', None) defectFn = og[ogId].get('rlnMicrographDefectFile', None) with open(movieStar, 'w') as f: coeffs = json.loads( movie.getAttributeValue('_rlnMotionModelCoeff', '[]')) motionMode = 1 if coeffs else 0 # Update some params in the general table replaceDict = { 'rlnMicrographMovieName': movie.getFileName(), 'rlnMotionModelVersion': motionMode } if gainFn: replaceDict['rlnMicrographGainName'] = gainFn if defectFn: replaceDict['rlnMicrographDefectFile'] = defectFn tableGeneral[0] = row._replace(**replaceDict) tableGeneral.writeStar(f, tableName='general', singleRow=True) # Write shifts tableShifts.clearRows() alignment = movie.getAlignment() shiftsX, shiftsY = alignment.getShifts() a0, aN = alignment.getRange() empty = -9999.000 for i in range(1, a0): tableShifts.addRow(i, empty, empty) # Adjust the shifts to be relative to the first frame # so let's add the opposite value xoff, yoff = -shiftsX[0], -shiftsY[0] for i in range(a0, aN + 1): tableShifts.addRow(i, shiftsX[i - a0] + xoff, shiftsY[i - a0] + yoff) for i in range(aN + 1, ndim + 1): tableShifts.addRow(i, empty, empty) tableShifts.writeStar(f, tableName='global_shift') # Write coefficients tableCoeffs.clearRows() if coeffs: for i, c in enumerate(coeffs): tableCoeffs.addRow(i, c) tableCoeffs.writeStar(f, tableName='local_motion_model') convert.writeSetOfParticles(inputParts, imgStar, outputDir=inputPartsFolder, alignType=ALIGN_PROJ, fillMagnification=True)
def run_job(project_dir, args): start = time.time() in_parts = args.in_parts job_dir = args.out_dir thresh = args.threshold model = args.model gpus = args.gpu getPath = lambda *arglist: os.path.join(project_dir, *arglist) if model == "None": model = CINDERELLA_GEN_MODEL else: model = getPath(model) # Reading the model star file from relion modelstar = in_parts.replace("_data.star", "_model.star") refstable = Table(fileName=getPath(modelstar), tableName='model_classes') refstack = refstable[0].rlnReferenceImage.split("@")[-1] nrCls = int(refstable[-1].rlnReferenceImage.split("@")[0]) if DEBUG: print("Found input class averages stack: %s" % refstack) # Launching cinderella args_dict = { '-i': getPath(refstack), '-o': 'output', '-w': model, '--gpu': gpus, '-t': thresh, } cmd = "%s && %s " % (CONDA_ENV, CINDERELLA_PREDICT) cmd += " ".join(['%s %s' % (k, v) for k, v in args_dict.items()]) print("Running command:\n{}".format(cmd)) proc = subprocess.Popen(cmd, shell=True) proc.communicate() if proc.returncode: raise Exception("Command failed with return code %d" % proc.returncode) # Parse output to get good classes IDs outfn = os.path.basename(refstack.replace(".mrcs", "_index_confidence.txt")) outpath = getPath(job_dir, "output", outfn) goodcls = [] with open(outpath, "r") as f: for line in f: if float(line.split()[1]) > thresh: goodcls.append(int(line.split()[0]) + 1) else: break if DEBUG: print("Parsing output file: %s\nGood classes: %s" % (outpath, goodcls)) if len(goodcls) == 0: print("No good classes found. Job stopped.") end = time.time() diff = end - start print("Job duration = %dh %dmin %dsec \n" % (diff // 3600, diff // 60 % 60, diff % 60)) open(RELION_JOB_FAILURE_FILENAME, "w").close() exit(1) # Create output star file for Relion to use optics = Table(fileName=getPath(in_parts), tableName='optics') ptcls = Table(fileName=getPath(in_parts), tableName='particles') cols = ptcls.getColumnNames() out_ptcls = Table(columns=cols) for row in ptcls: if row.rlnClassNumber in goodcls: out_ptcls.addRow(*row) if DEBUG: print("Input particles: %d\nOutput particles: %d" % (len(ptcls), len(out_ptcls))) out_star = getPath(job_dir, "particles_for_training.star") with open(out_star, "w") as f: optics.writeStar(f, tableName="optics") out_ptcls.writeStar(f, tableName="particles") # Create backup_selection.star for results visualization sel = Table(columns=['rlnSelected']) for i in range(1, nrCls + 1): sel.addRow(1 if i in goodcls else 0) with open(getPath("backup_selection.star"), "w") as f: sel.writeStar(f, tableName="") end = time.time() diff = end - start print("Job duration = %dh %dmin %dsec \n" % (diff//3600, diff//60 % 60, diff % 60))