def createOutputStep(self): imgSet = self.inputParticles.get() partSet = self._createSetOfParticles() partSet.copyInfo(imgSet) outImagesMd = self._getExtraPath('expanded_particles.star') # remove repeating rlnImageId column tableName = '' if Plugin.IS_GT30(): tableName = 'particles' mdOptics = Table(fileName=outImagesMd, tableName='optics') mdOut = Table(fileName=outImagesMd, tableName=tableName) mdOut.removeColumns("rlnImageId") with open(outImagesMd, "w") as f: mdOut.writeStar(f, tableName=tableName) if Plugin.IS_GT30(): mdOptics.writeStar(f, tableName='optics') reader = convert.createReader() reader.readSetOfParticles( outImagesMd, partSet, alignType=ALIGN_PROJ, postprocessImageRow=self._postprocessImageRow) self._defineOutputs(outputParticles=partSet) self._defineSourceRelation(imgSet, partSet)
def _showChanges(self, paramName=None): labels = ['rlnIterationNumber'] + self.protocol.CHANGE_LABELS tableChanges = Table(columns=labels) print( "Computing average changes in offset, angles, and class membership" ) for it in self._getAllIters(): fn = self.protocol._getFileName('optimiser', iter=it) if not os.path.exists(fn): continue print("Computing data for iteration; %03d" % it) fn = self.protocol._getFileName('optimiser', iter=it) table = Table(fileName=fn, tableName='optimiser_general') row = table[0] cols = [ getattr(row, value) for value in self.protocol.CHANGE_LABELS ] tableChanges.addRow(it, *cols) fn = self.protocol._getFileName('all_changes') with open(fn, 'w') as f: tableChanges.writeStar(f) return [self.createDataView(fn)]
def _showPMax(self, paramName=None): labels = ['rlnIterationNumber', 'rlnAveragePmax', 'rlnLogLikelihood'] tablePMax = Table(columns=labels) for it in self._getAllIters(): if it == 1: # skip iter1 with Pmax=1 continue # always list all iterations prefix = self.protocol.PREFIXES[0] fn = self.protocol._getFileName(prefix + 'model', iter=it) table = Table(fileName=fn, tableName='model_general') row = table[0] tablePMax.addRow(int(it), float(row.rlnAveragePmax), float(row.rlnLogLikelihood)) fn = self.protocol._getFileName('all_avgPmax') with open(fn, 'w') as f: tablePMax.writeStar(f) xplotter = RelionPlotter() xplotter.createSubPlot("Avg PMax per Iterations", "Iterations", "Avg PMax") xplotter.plotMd(tablePMax, 'rlnIterationNumber', 'rlnAveragePmax') xplotter.showLegend(['rlnAveragePmax']) return [self.createDataView(fn), xplotter]
def _write(self, f): # Create columns from the first row items = self.first()._asdict().items() cols = [Table.Column(k, type(v)) for k, v in items] t = Table(columns=cols) for og in self._dict.values(): t.addRow(*og) t.writeStar(f, tableName='optics')
def convertInputStep(self, newMics, numPass): """ Create a star file as expected by cryoassess.""" micsTable = Table(columns=['rlnMicrographName']) for mic in newMics: micsTable.addRow(os.path.abspath(mic.getFileName())) with open(self.getInputFilename(numPass), 'w') as f: f.write("# Star file generated with Scipion\n") micsTable.writeStar(f, tableName='') self.appendTotalInputStar(numPass)
def test_write_singleRow(self): fn = '/tmp/test-single-row.star' print("Writing a single row to %s..." % fn) t = Table() f1 = StringIO(one_micrograph_mc) t.readStar(f1, tableName='global_shift') t.writeStar(sys.stdout, tableName='global_shift', singleRow=True) t = Table(columns=['rlnImageSizeX', 'rlnImageSizeY', 'rlnMicrographMovieName']) t.addRow(3710, 3838, 'Movies/14sep05c_00024sq_00003hl_00002es.frames.out.mrc') with open(fn, 'w') as f: t.writeStar(f, singleRow=True)
def convertInputStep(self, movId, partId, postId): inputMovies = self.inputMovies.get() inputParts = self.inputParticles.get() imgStar = self._getFileName('input_particles') inputPartsFolder = self._getInputPath('particles') pwutils.makePath(inputPartsFolder) self.info("Converting set from '%s' into '%s'" % (inputParts.getFileName(), imgStar)) tableGeneral = Table(columns=[ 'rlnImageSizeX', 'rlnImageSizeY', 'rlnImageSizeZ', 'rlnMicrographMovieName', 'rlnMicrographBinning', 'rlnMicrographOriginalPixelSize', 'rlnMicrographDoseRate', 'rlnMicrographPreExposure', 'rlnVoltage', 'rlnMicrographStartFrame', 'rlnMotionModelVersion', 'rlnMicrographGainName', 'rlnMicrographDefectFile' ]) tableShifts = Table(columns=[ 'rlnMicrographFrameNumber', 'rlnMicrographShiftX', 'rlnMicrographShiftY' ]) tableCoeffs = Table( columns=['rlnMotionModelCoeffsIdx', 'rlnMotionModelCoeff']) # Create the first row, later only the movieName will be updated xdim, ydim, ndim = inputMovies.getDim() acq = inputMovies.getAcquisition() firstMovie = inputMovies.getFirstItem() a0, aN = firstMovie.getAlignment().getRange() moviesPixelSize = inputMovies.getSamplingRate() binningFactor = inputParts.getSamplingRate() / moviesPixelSize og = convert.OpticsGroups.fromImages(inputMovies) writer = convert.createWriter(optics=og) writer.writeSetOfMicrographs(inputMovies, self._getFileName('input_mics'), postprocessImageRow=self._updateMic) tableGeneral.addRow(xdim, ydim, ndim, 'movieName', binningFactor, moviesPixelSize, acq.getDosePerFrame(), acq.getDoseInitial(), acq.getVoltage(), a0, 0, '""', '""') row = tableGeneral[0] for movie in inputMovies: movieStar = self._getMovieStar(movie) ogId = movie.getAttributeValue('_rlnOpticsGroup', 1) gainFn = og[ogId].get('rlnMicrographGainName', None) defectFn = og[ogId].get('rlnMicrographDefectFile', None) with open(movieStar, 'w') as f: coeffs = json.loads( movie.getAttributeValue('_rlnMotionModelCoeff', '[]')) motionMode = 1 if coeffs else 0 # Update some params in the general table replaceDict = { 'rlnMicrographMovieName': movie.getFileName(), 'rlnMotionModelVersion': motionMode } if gainFn: replaceDict['rlnMicrographGainName'] = gainFn if defectFn: replaceDict['rlnMicrographDefectFile'] = defectFn tableGeneral[0] = row._replace(**replaceDict) tableGeneral.writeStar(f, tableName='general', singleRow=True) # Write shifts tableShifts.clearRows() alignment = movie.getAlignment() shiftsX, shiftsY = alignment.getShifts() a0, aN = alignment.getRange() empty = -9999.000 for i in range(1, a0): tableShifts.addRow(i, empty, empty) # Adjust the shifts to be relative to the first frame # so let's add the opposite value xoff, yoff = -shiftsX[0], -shiftsY[0] for i in range(a0, aN + 1): tableShifts.addRow(i, shiftsX[i - a0] + xoff, shiftsY[i - a0] + yoff) for i in range(aN + 1, ndim + 1): tableShifts.addRow(i, empty, empty) tableShifts.writeStar(f, tableName='global_shift') # Write coefficients tableCoeffs.clearRows() if coeffs: for i, c in enumerate(coeffs): tableCoeffs.addRow(i, c) tableCoeffs.writeStar(f, tableName='local_motion_model') convert.writeSetOfParticles(inputParts, imgStar, outputDir=inputPartsFolder, alignType=ALIGN_PROJ, fillMagnification=True)
def convertInputStep(self, movId, partId, postId): inputMovies = self.inputMovies.get() inputParts = self.inputParticles.get() imgStar = self._getPath('input_particles.star') inputPartsFolder = self._getInputPath('particles') pwutils.makePath(inputPartsFolder) self.info("Converting set from '%s' into '%s'" % (inputParts.getFileName(), imgStar)) tableMovies = Table( columns=['rlnMicrographName', 'rlnMicrographMetadata']) tableGeneral = Table(columns=[ 'rlnImageSizeX', 'rlnImageSizeY', 'rlnImageSizeZ', 'rlnMicrographMovieName', 'rlnMicrographBinning', 'rlnMicrographOriginalPixelSize', 'rlnMicrographDoseRate', 'rlnMicrographPreExposure', 'rlnVoltage', 'rlnMicrographStartFrame', 'rlnMotionModelVersion' ]) tableShifts = Table(columns=[ 'rlnMicrographFrameNumber', 'rlnMicrographShiftX', 'rlnMicrographShiftY' ]) tableCoeffs = Table( columns=['rlnMotionModelCoeffsIdx', 'rlnMotionModelCoeff']) # Create the first row, later only the movieName will be updated xdim, ydim, ndim = inputMovies.getDim() acq = inputMovies.getAcquisition() firstMovie = inputMovies.getFirstItem() a0, aN = firstMovie.getAlignment().getRange() moviesPixelSize = inputMovies.getSamplingRate() binningFactor = inputParts.getSamplingRate() / moviesPixelSize hasLocal = firstMovie.hasAttribute('_rlnMotionModelCoeff') motionMode = 1 if hasLocal else 0 tableGeneral.addRow(xdim, ydim, ndim, 'movieName', binningFactor, moviesPixelSize, acq.getDosePerFrame(), acq.getDoseInitial(), acq.getVoltage(), a0, motionMode) row = tableGeneral[0] for movie in inputMovies: movieFn = movie.getFileName() movieBase = os.path.basename(movieFn) movieStar = self._getInputPath( pwutils.replaceBaseExt(movieFn, 'star')) tableMovies.addRow(movieBase, movieStar) with open(movieStar, 'w') as f: # Update Movie name tableGeneral[0] = row._replace(rlnMicrographMovieName=movieFn) tableGeneral.writeStar(f, tableName='general', singleRow=True) # Write shifts tableShifts.clearRows() alignment = movie.getAlignment() shiftsX, shiftsY = alignment.getShifts() a0, aN = alignment.getRange() empty = -9999.000 for i in range(1, a0): tableShifts.addRow(i, empty, empty) # Adjust the shifts to be relative to the first frame # so let's add the opposite value xoff, yoff = -shiftsX[0], -shiftsY[0] for i in range(a0, aN + 1): tableShifts.addRow(i, shiftsX[i - a0] + xoff, shiftsY[i - a0] + yoff) for i in range(aN + 1, ndim + 1): tableShifts.addRow(i, empty, empty) tableShifts.writeStar(f, tableName='global_shift') # Write coefficients if hasLocal: coeffs = movie.getAttributeValue('_rlnMotionModelCoeff', '') tableCoeffs.clearRows() for i, c in enumerate(json.loads(coeffs)): tableCoeffs.addRow(i, c) tableCoeffs.writeStar(f, tableName='local_motion_model') with open(self._getPath('input_corrected_micrographs.star'), 'w') as f: tableMovies.writeStar(f) convert.writeSetOfParticles(inputParts, imgStar, outputDir=inputPartsFolder, alignType=ALIGN_PROJ, fillMagnification=True)
def run_job(args): start = time.time() in_mics = args.in_mics job_dir = args.out_dir thresh = args.threshold box_size = args.box_size distance = 0 model = args.model filament = args.filament if filament: box_dist = args.box_distance min_boxes = args.minimum_number_boxes denoise = args.denoise gpus = args.gpu threads = args.threads if SCRATCH_DIR is not None: filtered_dir = os.path.join(SCRATCH_DIR, "filtered_tmp") else: filtered_dir = "%s/filtered_tmp/" % job_dir if model == "None": model = CRYOLO_GEN_MODEL if not denoise else CRYOLO_GEN_JANNI_MODEL else: model = os.path.abspath(model) # Making a cryolo config file json_dict = { "model": { "architecture": "PhosaurusNet", "input_size": 1024, "max_box_per_image": 600, "filter": [0.1, filtered_dir] }, "other": { "log_path": "%s/logs/" % job_dir } } if box_size: # is not 0 json_dict["model"]["anchors"] = [int(box_size), int(box_size)] if not filament: distance = int(box_size / 2) # use half the box_size if denoise: json_dict["model"]["filter"] = [ CRYOLO_JANNI_MODEL, 24, 3, filtered_dir ] if DEBUG: print("Using following config: ", json_dict) with open(os.path.join(job_dir, "config_cryolo.json"), "w") as json_file: json.dump(json_dict, json_file, indent=4) # Reading the micrographs star file from Relion mictable = Table(fileName=in_mics, tableName='micrographs') mic_fns = mictable.getColumnValues("rlnMicrographName") # Launching cryolo args_dict = { '--conf': os.path.join(job_dir, "config_cryolo.json"), '--input': in_mics, '--output': os.path.join(job_dir, 'output'), '--weights': model, '--gpu': gpus.replace(',', ' '), '--threshold': thresh, '--distance': distance, '--cleanup': "", '--skip': "", '--write_empty': "", '--num_cpu': -1 if threads == 1 else threads } if filament: args_dict.update({ '--filament': "", '--box_distance': box_dist, '--minimum_number_boxes': min_boxes, '--directional_method': 'PREDICTED' }) args_dict.pop('--distance') cmd = "%s && %s " % (CONDA_ENV, CRYOLO_PREDICT) cmd += " ".join(['%s %s' % (k, v) for k, v in args_dict.items()]) print("Running command:\n{}".format(cmd)) proc = subprocess.Popen(cmd, shell=True) proc.communicate() if proc.returncode: raise Exception("Command failed with return code %d" % proc.returncode) # Moving output star files for Relion to use table_coords = Table( columns=['rlnMicrographName', 'rlnMicrographCoordinates']) star_dir = "EMAN_HELIX_SEGMENTED" if filament else "STAR" ext = ".box" if filament else ".star" with open(os.path.join(job_dir, "autopick.star"), "w") as mics_star: for mic in mic_fns: mic_base = os.path.basename(mic) mic_dir = os.path.dirname(mic) if len(mic_dir.split("/")) > 1 and "job" in mic_dir.split( "/")[1]: # remove JobType/jobXXX mic_dir = "/".join(mic_dir.split("/")[2:]) os.makedirs(os.path.join(job_dir, mic_dir), exist_ok=True) coord_cryolo = os.path.splitext(mic_base)[0] + ext coord_cryolo = os.path.join(job_dir, "output", star_dir, coord_cryolo) coord_relion = os.path.splitext(mic_base)[0] + "_autopick" + ext coord_relion = os.path.join(job_dir, mic_dir, coord_relion) if os.path.exists(coord_cryolo): os.rename(coord_cryolo, coord_relion) table_coords.addRow(mic, coord_relion) if DEBUG: print("Moved %s to %s" % (coord_cryolo, coord_relion)) table_coords.writeStar(mics_star, tableName='coordinate_files') # Required output to mini pipeline job_pipeline.star file pipeline_fn = os.path.join(job_dir, "job_pipeline.star") table_gen = Table(columns=['rlnPipeLineJobCounter']) table_gen.addRow(2) table_proc = Table(columns=[ 'rlnPipeLineProcessName', 'rlnPipeLineProcessAlias', 'rlnPipeLineProcessTypeLabel', 'rlnPipeLineProcessStatusLabel' ]) table_proc.addRow(job_dir, 'None', 'relion.external', 'Running') table_nodes = Table( columns=['rlnPipeLineNodeName', 'rlnPipeLineNodeTypeLabel']) table_nodes.addRow(in_mics, "MicrographsData.star.relion") table_nodes.addRow(os.path.join(job_dir, "autopick.star"), "MicrographsCoords.star.relion.autopick") table_input = Table( columns=['rlnPipeLineEdgeFromNode', 'rlnPipeLineEdgeProcess']) table_input.addRow(in_mics, job_dir) table_output = Table( columns=['rlnPipeLineEdgeProcess', 'rlnPipeLineEdgeToNode']) table_output.addRow(job_dir, os.path.join(job_dir, "autopick.star")) with open(pipeline_fn, "w") as f: table_gen.writeStar(f, tableName="pipeline_general", singleRow=True) table_proc.writeStar(f, tableName="pipeline_processes") table_nodes.writeStar(f, tableName="pipeline_nodes") table_input.writeStar(f, tableName="pipeline_input_edges") table_output.writeStar(f, tableName="pipeline_output_edges") # Register output nodes in .Nodes/ os.makedirs(os.path.join(".Nodes", "MicrographsCoords", job_dir), exist_ok=True) open(os.path.join(".Nodes", "MicrographsCoords", job_dir, "autopick.star"), "w").close() outputFn = os.path.join(job_dir, "output_for_relion.star") if not os.path.exists(outputFn): # get estimated box size summaryfn = os.path.join(job_dir, "output/DISTR", 'size_distribution_summary*.txt') with open(glob(summaryfn)[0]) as f: for line in f: if line.startswith("MEAN,"): estim_sizepx = int(line.split(",")[-1]) break print("\ncrYOLO estimated box size %d px" % estim_sizepx) # calculate diameter, original (boxSize) and downsampled (boxSizeSmall) box optics = Table(fileName=in_mics, tableName='optics') angpix = float(optics[0].rlnMicrographPixelSize) if filament: # box size = 1.5x tube diam diam = 0.66 * box_size else: # use + 20% for diameter diam = math.ceil(estim_sizepx * angpix * 1.2) # use +30% for box size, make it even boxSize = 1.3 * estim_sizepx boxSize = math.ceil(boxSize / 2.) * 2 # from relion_it.py script # Authors: Sjors H.W. Scheres, Takanori Nakane & Colin M. Palmer boxSizeSmall = None for box in (48, 64, 96, 128, 160, 192, 256, 288, 300, 320, 360, 384, 400, 420, 450, 480, 512, 640, 768, 896, 1024): # Don't go larger than the original box if box > boxSize: boxSizeSmall = boxSize break # If Nyquist freq. is better than 7.5 A, use this # downscaled box, otherwise continue to next size up small_box_angpix = angpix * boxSize / box if small_box_angpix < 3.75: boxSizeSmall = box break print( "\nSuggested parameters:\n\tDiameter (A): %d\n\tBox size (px): %d\n" "\tBox size binned (px): %d" % (diam, boxSize, boxSizeSmall)) # output all params into a star file tableCryolo = Table(columns=[ 'rlnParticleDiameter', 'rlnOriginalImageSize', 'rlnImageSize' ]) tableCryolo.addRow(diam, boxSize, boxSizeSmall) with open(outputFn, "w") as f: tableCryolo.writeStar(f, tableName='picker') # create .gui_manualpickjob.star for easy display starString = """ # version 30001 data_job _rlnJobTypeLabel relion.manualpick%s _rlnJobIsContinue 0 _rlnJobIsTomo 0 # version 30001 data_joboptions_values loop_ _rlnJobOptionVariable #1 _rlnJobOptionValue #2 angpix %f black_val 0 blue_value 0 color_label rlnParticleSelectZScore diameter %d do_color No do_fom_threshold No do_queue No do_startend No fn_color "" fn_in "" highpass -1 lowpass 20 micscale 0.2 min_dedicated 1 minimum_pick_fom 0 other_args "" qsub qsub qsubscript /public/EM/RELION/relion/bin/relion_qsub.csh queuename openmpi red_value 2 sigma_contrast 3 white_val 0 """ label = ".helical" if filament else "" with open(".gui_manualpickjob.star", "w") as f: f.write(starString % (label, angpix, diam)) end = time.time() diff = end - start print("Job duration = %dh %dmin %dsec \n" % (diff // 3600, diff // 60 % 60, diff % 60))
def run_job(project_dir, args): start = time.time() in_parts = args.in_parts job_dir = args.out_dir batch = args.batch_size gpu = args.gpu getPath = lambda *arglist: os.path.join(project_dir, *arglist) # Reading the model star file from relion modelstar = in_parts.replace("_data.star", "_model.star") refstable = Table(fileName=getPath(modelstar), tableName='model_classes') refstack = refstable[0].rlnReferenceImage.split("@")[-1] nrCls = int(refstable[-1].rlnReferenceImage.split("@")[0]) if DEBUG: print("Found input class averages stack: %s" % refstack) # Launching cryoassess args_dict = { '-i': getPath(refstack), '-o': getPath(job_dir, 'output'), '-b': batch, '-m': CRYOASSESS_2D_MODEL, } cmd = "%s && CUDA_VISIBLE_DEVICES=%s %s " % (CONDA_ENV, gpu, CRYOASSESS_2D) cmd += " ".join(['%s %s' % (k, v) for k, v in args_dict.items()]) print("Running command:\n{}".format(cmd)) proc = subprocess.Popen(cmd, shell=True) proc.communicate() if proc.returncode: raise Exception("Command failed with return code %d" % proc.returncode) # Parse output to get good classes IDs goodTemplate = getPath(job_dir, "output/Good/particle_*.jpg") regex = re.compile('particle_(\d*)\.jpg') goodcls = [] files = glob(goodTemplate) if files: for i in files: s = regex.search(i) goodcls.append(int(s.group(1))) if DEBUG: print("Parsing output files: %s\nGood classes: %s" % (goodTemplate, goodcls)) if len(goodcls) == 0: print("No good classes found. Job stopped.") end = time.time() diff = end - start print("Job duration = %dh %dmin %dsec \n" % (diff // 3600, diff // 60 % 60, diff % 60)) open(RELION_JOB_FAILURE_FILENAME, "w").close() exit(1) # Create output star file for Relion to use optics = Table(fileName=getPath(in_parts), tableName='optics') ptcls = Table(fileName=getPath(in_parts), tableName='particles') cols = ptcls.getColumnNames() out_ptcls = Table(columns=cols) for row in ptcls: if row.rlnClassNumber in goodcls: out_ptcls.addRow(*row) if DEBUG: print("Input particles: %d\nOutput particles: %d" % (len(ptcls), len(out_ptcls))) out_star = getPath(job_dir, "particles_for_training.star") with open(out_star, "w") as f: optics.writeStar(f, tableName="optics") out_ptcls.writeStar(f, tableName="particles") # Create backup_selection.star for results visualization sel = Table(columns=['rlnSelected']) for i in range(1, nrCls + 1): sel.addRow(1 if i in goodcls else 0) with open(getPath("backup_selection.star"), "w") as f: sel.writeStar(f, tableName="") end = time.time() diff = end - start print("Job duration = %dh %dmin %dsec \n" % (diff // 3600, diff // 60 % 60, diff % 60))
def run_job(project_dir, args): start = time.time() in_parts = args.in_parts job_dir = args.out_dir model = args.model or CRYOLO_GEN_MODEL gpus = args.gpu getPath = lambda *arglist: os.path.join(project_dir, *arglist) if SCRATCH_DIR is not None: filtered_dir = os.path.join(SCRATCH_DIR, "filtered_tmp") else: filtered_dir = "%s/filtered_tmp/" % job_dir # Create folder structure for cryolo os.mkdir(IMG_FOLDER) os.mkdir(ANNOT_FOLDER) # Reading the box size from relion optics = Table(fileName=getPath(in_parts), tableName='optics')[0] box_bin = int(optics.rlnImageSize) box_size = float(optics.rlnImagePixelSize) // float(optics.rlnMicrographOriginalPixelSize) * box_bin print("Using unbinned box size of %d px" % box_size) # Making a cryolo config file json_dict = { "model": { "architecture": "PhosaurusNet", "input_size": 1024, "max_box_per_image": 600, "anchors": [box_size, box_size], "filter": [ 0.1, filtered_dir ] }, "train": { "train_image_folder": IMG_FOLDER, "train_annot_folder": ANNOT_FOLDER, "train_times": 10, "batch_size": 6, "learning_rate": 0.0001, "nb_epoch": 200, "object_scale": 5.0, "no_object_scale": 1.0, "coord_scale": 1.0, "class_scale": 1.0, "pretrained_weights": "%s" % model, "saved_weights_name": getPath(job_dir, TUNE_MODEL), "debug": True }, "valid": { "valid_image_folder": "", "valid_annot_folder": "", "valid_times": 1 } } if DEBUG: print("Using following config: ", json_dict) with open("config_cryolo.json", "w") as json_file: json.dump(json_dict, json_file, indent=4) # Reading the particles from relion try: parttable = Table(fileName=getPath(in_parts), tableName='particles') except: print("Could not read particles table from %s. Stopping" % in_parts) return mics_dict = {} # Arranging files for cryolo: making symlinks for mics and creating box files for row in parttable: mic = row.rlnMicrographName xCoord = int(int(row.rlnCoordinateX) - box_size / 2) yCoord = int(int(row.rlnCoordinateY) - box_size / 2) if mic in mics_dict: mics_dict[mic].append((xCoord, yCoord)) else: mics_dict[mic] = [(xCoord, yCoord)] for mic in mics_dict: micSrc = getPath(mic) micDst = getPath(job_dir, IMG_FOLDER, os.path.basename(mic)) if not os.path.exists(micDst): os.symlink(micSrc, micDst) if DEBUG: print("Link %s --> %s" % (micSrc, micDst)) box = os.path.splitext(micDst)[0] + ".box" box = box.replace(IMG_FOLDER, ANNOT_FOLDER) with open(box, "w") as f: for coords in mics_dict[mic]: f.write("%s\t%s\t%s\t%s\n" % (coords[0], coords[1], box_size, box_size)) if DEBUG: print("Created box file: %s" % box) # Launching cryolo args_dict = { '--conf': "config_cryolo.json", '--gpu': gpus.replace(',', ' '), '--warmup': 0, '--fine_tune': "", '--cleanup': "" } cmd = "%s && %s " % (CONDA_ENV, CRYOLO_TRAIN) cmd += " ".join(['%s %s' % (k, v) for k, v in args_dict.items()]) print("Running command:\n{}".format(cmd)) proc = subprocess.Popen(cmd, shell=True) proc.communicate() if proc.returncode: raise Exception("Command failed with return code %d" % proc.returncode) # Required output job_pipeline.star file pipeline_fn = getPath(job_dir, "job_pipeline.star") table_gen = Table(columns=['rlnPipeLineJobCounter']) table_gen.addRow(2) table_proc = Table(columns=['rlnPipeLineProcessName', 'rlnPipeLineProcessAlias', 'rlnPipeLineProcessTypeLabel', 'rlnPipeLineProcessStatusLabel']) table_proc.addRow(job_dir, 'None', 'relion.external', 'Running') table_nodes = Table(columns=['rlnPipeLineNodeName', 'rlnPipeLineNodeTypeLabel']) table_nodes.addRow(in_parts, "ParticlesData.star.relion") table_input = Table(columns=['rlnPipeLineEdgeFromNode', 'rlnPipeLineEdgeProcess']) table_input.addRow(in_parts, job_dir) with open(pipeline_fn, "w") as f: table_gen.writeStar(f, tableName="pipeline_general", singleRow=True) table_proc.writeStar(f, tableName="pipeline_processes") table_nodes.writeStar(f, tableName="pipeline_nodes") table_input.writeStar(f, tableName="pipeline_input_edges") end = time.time() diff = end - start print("Job duration = %dh %dmin %dsec \n" % (diff // 3600, diff // 60 % 60, diff % 60))
def run_job(project_dir, args): start = time.time() in_parts = args.in_parts job_dir = args.out_dir thresh = args.threshold model = args.model gpus = args.gpu getPath = lambda *arglist: os.path.join(project_dir, *arglist) if model == "None": model = CINDERELLA_GEN_MODEL else: model = getPath(model) # Reading the model star file from relion modelstar = in_parts.replace("_data.star", "_model.star") refstable = Table(fileName=getPath(modelstar), tableName='model_classes') refstack = refstable[0].rlnReferenceImage.split("@")[-1] nrCls = int(refstable[-1].rlnReferenceImage.split("@")[0]) if DEBUG: print("Found input class averages stack: %s" % refstack) # Launching cinderella args_dict = { '-i': getPath(refstack), '-o': 'output', '-w': model, '--gpu': gpus, '-t': thresh, } cmd = "%s && %s " % (CONDA_ENV, CINDERELLA_PREDICT) cmd += " ".join(['%s %s' % (k, v) for k, v in args_dict.items()]) print("Running command:\n{}".format(cmd)) proc = subprocess.Popen(cmd, shell=True) proc.communicate() if proc.returncode: raise Exception("Command failed with return code %d" % proc.returncode) # Parse output to get good classes IDs outfn = os.path.basename(refstack.replace(".mrcs", "_index_confidence.txt")) outpath = getPath(job_dir, "output", outfn) goodcls = [] with open(outpath, "r") as f: for line in f: if float(line.split()[1]) > thresh: goodcls.append(int(line.split()[0]) + 1) else: break if DEBUG: print("Parsing output file: %s\nGood classes: %s" % (outpath, goodcls)) if len(goodcls) == 0: print("No good classes found. Job stopped.") end = time.time() diff = end - start print("Job duration = %dh %dmin %dsec \n" % (diff // 3600, diff // 60 % 60, diff % 60)) open(RELION_JOB_FAILURE_FILENAME, "w").close() exit(1) # Create output star file for Relion to use optics = Table(fileName=getPath(in_parts), tableName='optics') ptcls = Table(fileName=getPath(in_parts), tableName='particles') cols = ptcls.getColumnNames() out_ptcls = Table(columns=cols) for row in ptcls: if row.rlnClassNumber in goodcls: out_ptcls.addRow(*row) if DEBUG: print("Input particles: %d\nOutput particles: %d" % (len(ptcls), len(out_ptcls))) out_star = getPath(job_dir, "particles_for_training.star") with open(out_star, "w") as f: optics.writeStar(f, tableName="optics") out_ptcls.writeStar(f, tableName="particles") # Create backup_selection.star for results visualization sel = Table(columns=['rlnSelected']) for i in range(1, nrCls + 1): sel.addRow(1 if i in goodcls else 0) with open(getPath("backup_selection.star"), "w") as f: sel.writeStar(f, tableName="") end = time.time() diff = end - start print("Job duration = %dh %dmin %dsec \n" % (diff//3600, diff//60 % 60, diff % 60))