def test_removeColumns(self): print("Checking removeColumns...") dataFile = testfile('star', 'refine3d', 'run_it016_data.star') table = Table(fileName=dataFile, tableName='particles') expectedCols = [ 'rlnCoordinateX', 'rlnCoordinateY', 'rlnAutopickFigureOfMerit', 'rlnClassNumber', 'rlnAnglePsi', 'rlnImageName', 'rlnMicrographName', 'rlnOpticsGroup', 'rlnCtfMaxResolution', 'rlnCtfFigureOfMerit', 'rlnDefocusU', 'rlnDefocusV', 'rlnDefocusAngle', 'rlnCtfBfactor', 'rlnCtfScalefactor', 'rlnPhaseShift', 'rlnGroupNumber', 'rlnAngleRot', 'rlnAngleTilt', 'rlnOriginXAngst', 'rlnOriginYAngst', 'rlnNormCorrection', 'rlnLogLikeliContribution', 'rlnMaxValueProbDistribution', 'rlnNrOfSignificantSamples', 'rlnRandomSubset' ] colsToRemove = [ 'rlnOriginXAngst', 'rlnOriginYAngst', 'rlnNormCorrection', 'rlnAnglePsi', 'rlnMaxValueProbDistribution' ] # Check all columns were read properly self.assertEqual(expectedCols, table.getColumnNames()) # Check also using hasAllColumns method self.assertTrue(table.hasAllColumns(expectedCols)) table.removeColumns(colsToRemove) self.assertEqual([c for c in expectedCols if c not in colsToRemove], table.getColumnNames()) # Check also using hasAnyColumn method self.assertFalse(table.hasAnyColumn(colsToRemove))
def test_addColumns(self): tmpOutput = '/tmp/sampling.star' print("Checking addColumns to %s..." % tmpOutput) dataFile = testfile('star', 'refine3d', 'run_it016_sampling.star') table = Table(fileName=dataFile, tableName='sampling_directions') expectedCols = ['rlnAngleRot', 'rlnAngleTilt', 'rlnAnglePsi', 'rlnExtraAngle1', 'rlnExtraAngle2', 'rlnAnotherConst' ] self.assertEqual(expectedCols[:2], table.getColumnNames()) table.addColumns('rlnAnglePsi=0.0', 'rlnExtraAngle1=rlnAngleRot', 'rlnExtraAngle2=rlnExtraAngle1', 'rlnAnotherConst=1000') self.assertEqual(expectedCols, table.getColumnNames()) # Check values def _values(colName): return table.getColumnValues(colName) for v1, v2, v3 in zip(_values('rlnAngleRot'), _values('rlnExtraAngle1'), _values('rlnExtraAngle2')): self.assertAlmostEqual(v1, v2) self.assertAlmostEqual(v1, v3) self.assertTrue(all(v == 1000 for v in _values('rlnAnotherConst'))) table.write(tmpOutput, tableName='sampling_directions')
def createFinalFilesStep(self): # -----metadata to save all final models------- finalModel = self._getFileName('finalModel') finalModelMd = self._getMetadata() # -----metadata to save all final particles----- finalData = self._getFileName('finalData') fn = self._getFileName('rawFinalData') print("FN: ", fn) tableIn = Table(fileName=fn, tableName='particles') cols = [str(c) for c in tableIn.getColumnNames()] ouTable = Table(columns=cols, tableName='particles') for rLev in self._getRLevList(): it = self._lastIter(rLev) modelFn = self._getFileName('model', iter=it, lev=self._level, rLev=rLev) modelMd = self._getMetadata('model_classes@' + modelFn) refLabel = md.RLN_MLMODEL_REF_IMAGE imgRow = md.getFirstRow(modelMd) fn = imgRow.getValue(refLabel) mapId = self._getRunLevId(rLev=rLev) newMap = self._getMapById(mapId) imgRow.setValue(refLabel, newMap) copyFile(fn, newMap) self._mapsDict[fn] = mapId imgRow.addToMd(finalModelMd) dataFn = self._getFileName('data', iter=it, lev=self._level, rLev=rLev) pTable = Table() for row in pTable.iterRows(dataFn, tableName='particles'): newRow = row._replace(rlnClassNumber=rLev) ouTable.addRow(*newRow) self.writeStar(finalData, ouTable) finalModelMd.write('model_classes@' + finalModel)
def mergeClassesStep(self): if self.doGrouping: from cryomethods.functions import NumpyImgHandler npIh = NumpyImgHandler() makePath(self._getLevelPath(self._level)) listVol = self._getFinalMaps() matrix = npIh.getAllNpList(listVol, 2) labels = self._clusteringData(matrix) clsChange = 0 prevStar = self._getFileName('rawFinalData') pTable = Table() origStar = self._getFileName('input_star', lev=1, rLev=1) opticsTable = Table(fileName=origStar, tableName='optics') print("OPTABLE: ", origStar, opticsTable.size()) for row in pTable.iterRows(prevStar, key="rlnClassNumber", tableName='particles'): clsPart = row.rlnClassNumber newClass = labels[clsPart - 1] + 1 newRow = row._replace(rlnClassNumber=newClass) if not newClass == clsChange: if not clsChange == 0: self.writeStar(fn, ouTable, opticsTable) clsChange = newClass fn = self._getFileName('input_star', lev=self._level, rLev=newClass) tableIn = Table(fileName=prevStar, tableName='particles') cols = [str(c) for c in tableIn.getColumnNames()] ouTable = Table(columns=cols, tableName='particles') ouTable.addRow(*newRow) print("mergeClassesStep ouTable.size: ", ouTable.size()) self.writeStar(fn, ouTable, opticsTable) else: prevData = self._getFileName('rawFinalData') finalData = self._getFileName('finalData') prevModel = self._getFileName('rawFinalModel') finalModel = self._getFileName('finalModel') copyFile(prevData, finalData) copyFile(prevModel, finalModel)
def _mergeDataStar(self, rLev, callback): def _getMapId(rMap): try: return self._mapsDict[rMap] except: return None iters = self._lastIter(rLev) #metadata to save all particles that continues outData = self._getFileName('outputData', lev=self._level) #metadata to save all final particles finalData = self._getFileName('rawFinalData') imgStar = self._getFileName('data', iter=iters, lev=self._level, rLev=rLev) opTable = Table(filename=imgStar, tableName='optics') tableIn = Table(fileName=imgStar, tableName='particles') print("IMGSTAR: ", imgStar, "PARTS: ", tableIn.size()) cols = [str(c) for c in tableIn.getColumnNames()] outTable = Table(columns=cols, tableName='particles') finalTable = Table(columns=cols, tableName='particles') if os.path.exists(outData): print("Exists ", outData) tmpTable = Table() for row in tmpTable.iterRows(outData, tableName='particles'): outTable.addRow(*row) if os.path.exists(finalData): print("Exists ", finalData) tpTable = Table() for row in tpTable.iterRows(finalData, tableName='particles'): finalTable.addRow(*row) pTable = Table() for row in pTable.iterRows(imgStar, key="rlnClassNumber", tableName='particles'): clsPart = row.rlnClassNumber rMap = callback(iters, rLev, clsPart) mapId = _getMapId(rMap) while mapId is None: for clsPart in range(1, self.numberOfClasses.get()+1): rMap = callback(iters, rLev, clsPart) mapId = _getMapId(rMap) if mapId is not None: break if self.stopDict[mapId]: # if mapId != newMapId: # if newMapId != '00.000': # print(mdClass) # mdClass.write(classMd) # classMd = self._getFileName('mdataForClass', id=mapId) # mdClass = self._getMetadata(classMd) # newMapId = mapId classId = self._clsIdDict[mapId] newRow = row._replace(rlnClassNumber=classId) finalTable.addRow(*newRow) else: classId = int(mapId.split('.')[1]) newRow = row._replace(rlnClassNumber=classId) outTable.addRow(*newRow) # if self.stopDict[mapId]: # if mdClass.size() != 0: # mdClass.write(classMd) if finalTable.size() != 0: print("finalTable.size: ", finalTable.size()) self.writeStar(finalData, finalTable) if outTable.size() != 0: print("outTable.size: ", outTable.size()) self.writeStar(outData, outTable, opTable)
def run_job(project_dir, args): start = time.time() in_parts = args.in_parts job_dir = args.out_dir batch = args.batch_size gpu = args.gpu getPath = lambda *arglist: os.path.join(project_dir, *arglist) # Reading the model star file from relion modelstar = in_parts.replace("_data.star", "_model.star") refstable = Table(fileName=getPath(modelstar), tableName='model_classes') refstack = refstable[0].rlnReferenceImage.split("@")[-1] nrCls = int(refstable[-1].rlnReferenceImage.split("@")[0]) if DEBUG: print("Found input class averages stack: %s" % refstack) # Launching cryoassess args_dict = { '-i': getPath(refstack), '-o': getPath(job_dir, 'output'), '-b': batch, '-m': CRYOASSESS_2D_MODEL, } cmd = "%s && CUDA_VISIBLE_DEVICES=%s %s " % (CONDA_ENV, gpu, CRYOASSESS_2D) cmd += " ".join(['%s %s' % (k, v) for k, v in args_dict.items()]) print("Running command:\n{}".format(cmd)) proc = subprocess.Popen(cmd, shell=True) proc.communicate() if proc.returncode: raise Exception("Command failed with return code %d" % proc.returncode) # Parse output to get good classes IDs goodTemplate = getPath(job_dir, "output/Good/particle_*.jpg") regex = re.compile('particle_(\d*)\.jpg') goodcls = [] files = glob(goodTemplate) if files: for i in files: s = regex.search(i) goodcls.append(int(s.group(1))) if DEBUG: print("Parsing output files: %s\nGood classes: %s" % (goodTemplate, goodcls)) if len(goodcls) == 0: print("No good classes found. Job stopped.") end = time.time() diff = end - start print("Job duration = %dh %dmin %dsec \n" % (diff // 3600, diff // 60 % 60, diff % 60)) open(RELION_JOB_FAILURE_FILENAME, "w").close() exit(1) # Create output star file for Relion to use optics = Table(fileName=getPath(in_parts), tableName='optics') ptcls = Table(fileName=getPath(in_parts), tableName='particles') cols = ptcls.getColumnNames() out_ptcls = Table(columns=cols) for row in ptcls: if row.rlnClassNumber in goodcls: out_ptcls.addRow(*row) if DEBUG: print("Input particles: %d\nOutput particles: %d" % (len(ptcls), len(out_ptcls))) out_star = getPath(job_dir, "particles_for_training.star") with open(out_star, "w") as f: optics.writeStar(f, tableName="optics") out_ptcls.writeStar(f, tableName="particles") # Create backup_selection.star for results visualization sel = Table(columns=['rlnSelected']) for i in range(1, nrCls + 1): sel.addRow(1 if i in goodcls else 0) with open(getPath("backup_selection.star"), "w") as f: sel.writeStar(f, tableName="") end = time.time() diff = end - start print("Job duration = %dh %dmin %dsec \n" % (diff // 3600, diff // 60 % 60, diff % 60))
def convertInputStep(self, resetDeps, copyAlignment): import random """ Create the input file in STAR format as expected by Relion. If the input particles comes from Relion, just link the file. """ if self._level == 0: makePath(self._getRunPath(self._level, 1)) imgStar = self._getFileName('input_star', lev=self._level, rLev=0) self._convertStar(copyAlignment, imgStar) opticsTable = Table(fileName=imgStar, tableName='optics') partsTable = Table(fileName=imgStar, tableName='particles') self._convertVol(ImageHandler(), self.inputVolumes.get()) mdSize = partsTable.size() for i in range(9, 1, -1): makePath(self._getRunPath(self._level, i)) mStar = self._getFileName('input_star', lev=self._level, rLev=i) size = 10000 * i if mdSize >= 100000 else int(mdSize * 0.1 * i) print("partsTable: ", size, i, mdSize) partsTable._rows = random.sample(partsTable._rows, k=size) self.writeStar(mStar, partsTable, opticsTable) elif self._level == 1: imgStar = self._getFileName('input_star', lev=self._level, rLev=1) makePath(self._getRunPath(self._level, 1)) self._convertStar(copyAlignment, imgStar) # find a clever way to avoid volume conversion if its already done. self._convertVol(ImageHandler(), self.inputVolumes.get()) else: lastCls = None prevStar = self._getFileName('outputData', lev=self._level - 1) firstStarFn = self._getFileName('input_star', lev=1, rLev=1) # mdData = md.MetaData(prevStar) opTable = Table(fileName=firstStarFn, tableName='optics') tableIn = Table(fileName=prevStar, tableName='particles') cols = [str(c) for c in tableIn.getColumnNames()] pTable = Table() for row in pTable.iterRows(prevStar, key="rlnClassNumber", tableName='particles'): clsPart = row.rlnClassNumber if clsPart != lastCls: makePath(self._getRunPath(self._level, clsPart)) if lastCls is not None: print("writing %s" % fn) # mdInput.write(fn) self.writeStar(fn, newPTable, opTable) paths = self._getRunPath(self._level, clsPart) makePath(paths) print ("Path: %s and newRlev: %d" % (paths, clsPart)) lastCls = clsPart newPTable = Table(columns=cols, tableName='particles') fn = self._getFileName('input_star', lev=self._level, rLev=clsPart) # objId = mdInput.addObject() newPTable.addRow(*row) # row.writeToMd(mdInput, objId) print("writing %s and ending the loop" % fn) self.writeStar(fn, newPTable, opTable)
def run_job(project_dir, args): start = time.time() in_parts = args.in_parts job_dir = args.out_dir thresh = args.threshold model = args.model gpus = args.gpu getPath = lambda *arglist: os.path.join(project_dir, *arglist) if model == "None": model = CINDERELLA_GEN_MODEL else: model = getPath(model) # Reading the model star file from relion modelstar = in_parts.replace("_data.star", "_model.star") refstable = Table(fileName=getPath(modelstar), tableName='model_classes') refstack = refstable[0].rlnReferenceImage.split("@")[-1] nrCls = int(refstable[-1].rlnReferenceImage.split("@")[0]) if DEBUG: print("Found input class averages stack: %s" % refstack) # Launching cinderella args_dict = { '-i': getPath(refstack), '-o': 'output', '-w': model, '--gpu': gpus, '-t': thresh, } cmd = "%s && %s " % (CONDA_ENV, CINDERELLA_PREDICT) cmd += " ".join(['%s %s' % (k, v) for k, v in args_dict.items()]) print("Running command:\n{}".format(cmd)) proc = subprocess.Popen(cmd, shell=True) proc.communicate() if proc.returncode: raise Exception("Command failed with return code %d" % proc.returncode) # Parse output to get good classes IDs outfn = os.path.basename(refstack.replace(".mrcs", "_index_confidence.txt")) outpath = getPath(job_dir, "output", outfn) goodcls = [] with open(outpath, "r") as f: for line in f: if float(line.split()[1]) > thresh: goodcls.append(int(line.split()[0]) + 1) else: break if DEBUG: print("Parsing output file: %s\nGood classes: %s" % (outpath, goodcls)) if len(goodcls) == 0: print("No good classes found. Job stopped.") end = time.time() diff = end - start print("Job duration = %dh %dmin %dsec \n" % (diff // 3600, diff // 60 % 60, diff % 60)) open(RELION_JOB_FAILURE_FILENAME, "w").close() exit(1) # Create output star file for Relion to use optics = Table(fileName=getPath(in_parts), tableName='optics') ptcls = Table(fileName=getPath(in_parts), tableName='particles') cols = ptcls.getColumnNames() out_ptcls = Table(columns=cols) for row in ptcls: if row.rlnClassNumber in goodcls: out_ptcls.addRow(*row) if DEBUG: print("Input particles: %d\nOutput particles: %d" % (len(ptcls), len(out_ptcls))) out_star = getPath(job_dir, "particles_for_training.star") with open(out_star, "w") as f: optics.writeStar(f, tableName="optics") out_ptcls.writeStar(f, tableName="particles") # Create backup_selection.star for results visualization sel = Table(columns=['rlnSelected']) for i in range(1, nrCls + 1): sel.addRow(1 if i in goodcls else 0) with open(getPath("backup_selection.star"), "w") as f: sel.writeStar(f, tableName="") end = time.time() diff = end - start print("Job duration = %dh %dmin %dsec \n" % (diff//3600, diff//60 % 60, diff % 60))