Beispiel #1
0
    def test_removeColumns(self):
        print("Checking removeColumns...")
        dataFile = testfile('star', 'refine3d', 'run_it016_data.star')
        table = Table(fileName=dataFile, tableName='particles')
        
        expectedCols = [
            'rlnCoordinateX',
            'rlnCoordinateY',
            'rlnAutopickFigureOfMerit',
            'rlnClassNumber',
            'rlnAnglePsi',
            'rlnImageName',
            'rlnMicrographName',
            'rlnOpticsGroup',
            'rlnCtfMaxResolution',
            'rlnCtfFigureOfMerit',
            'rlnDefocusU',
            'rlnDefocusV',
            'rlnDefocusAngle',
            'rlnCtfBfactor',
            'rlnCtfScalefactor',
            'rlnPhaseShift',
            'rlnGroupNumber',
            'rlnAngleRot',
            'rlnAngleTilt',
            'rlnOriginXAngst',
            'rlnOriginYAngst',
            'rlnNormCorrection',
            'rlnLogLikeliContribution',
            'rlnMaxValueProbDistribution',
            'rlnNrOfSignificantSamples',
            'rlnRandomSubset'
        ]

        colsToRemove = [
            'rlnOriginXAngst',
            'rlnOriginYAngst',
            'rlnNormCorrection',
            'rlnAnglePsi',
            'rlnMaxValueProbDistribution'
        ]

        # Check all columns were read properly
        self.assertEqual(expectedCols, table.getColumnNames())
        # Check also using hasAllColumns method
        self.assertTrue(table.hasAllColumns(expectedCols))

        table.removeColumns(colsToRemove)
        self.assertEqual([c for c in expectedCols if c not in colsToRemove],
                         table.getColumnNames())
        # Check also using hasAnyColumn method
        self.assertFalse(table.hasAnyColumn(colsToRemove))
Beispiel #2
0
    def test_addColumns(self):
        tmpOutput = '/tmp/sampling.star'
        print("Checking addColumns to %s..." % tmpOutput)
        dataFile = testfile('star', 'refine3d', 'run_it016_sampling.star')
        table = Table(fileName=dataFile, tableName='sampling_directions')

        expectedCols = ['rlnAngleRot',
                        'rlnAngleTilt',
                        'rlnAnglePsi',
                        'rlnExtraAngle1',
                        'rlnExtraAngle2',
                        'rlnAnotherConst'
                        ]

        self.assertEqual(expectedCols[:2], table.getColumnNames())

        table.addColumns('rlnAnglePsi=0.0',
                         'rlnExtraAngle1=rlnAngleRot',
                         'rlnExtraAngle2=rlnExtraAngle1',
                         'rlnAnotherConst=1000')

        self.assertEqual(expectedCols, table.getColumnNames())

        # Check values
        def _values(colName):
            return table.getColumnValues(colName)

        for v1, v2, v3 in zip(_values('rlnAngleRot'),
                              _values('rlnExtraAngle1'),
                              _values('rlnExtraAngle2')):
            self.assertAlmostEqual(v1, v2)
            self.assertAlmostEqual(v1, v3)

        self.assertTrue(all(v == 1000 for v in _values('rlnAnotherConst')))

        table.write(tmpOutput, tableName='sampling_directions')
Beispiel #3
0
    def createFinalFilesStep(self):
        # -----metadata to save all final models-------
        finalModel = self._getFileName('finalModel')
        finalModelMd = self._getMetadata()

        # -----metadata to save all final particles-----
        finalData = self._getFileName('finalData')

        fn = self._getFileName('rawFinalData')
        print("FN: ", fn)
        tableIn = Table(fileName=fn, tableName='particles')
        cols = [str(c) for c in tableIn.getColumnNames()]
        ouTable = Table(columns=cols, tableName='particles')

        for rLev in self._getRLevList():
            it = self._lastIter(rLev)
            modelFn = self._getFileName('model', iter=it,
                                        lev=self._level, rLev=rLev)
            modelMd = self._getMetadata('model_classes@' + modelFn)

            refLabel = md.RLN_MLMODEL_REF_IMAGE
            imgRow = md.getFirstRow(modelMd)
            fn = imgRow.getValue(refLabel)

            mapId = self._getRunLevId(rLev=rLev)
            newMap = self._getMapById(mapId)
            imgRow.setValue(refLabel, newMap)
            copyFile(fn, newMap)
            self._mapsDict[fn] = mapId

            imgRow.addToMd(finalModelMd)

            dataFn = self._getFileName('data', iter=it,
                                       lev=self._level, rLev=rLev)

            pTable = Table()
            for row in pTable.iterRows(dataFn, tableName='particles'):
                newRow = row._replace(rlnClassNumber=rLev)
                ouTable.addRow(*newRow)

        self.writeStar(finalData, ouTable)
        finalModelMd.write('model_classes@' + finalModel)
Beispiel #4
0
    def mergeClassesStep(self):
        if self.doGrouping:
            from cryomethods.functions import NumpyImgHandler
            npIh = NumpyImgHandler()
            makePath(self._getLevelPath(self._level))
            listVol = self._getFinalMaps()
            matrix = npIh.getAllNpList(listVol, 2)
            labels = self._clusteringData(matrix)

            clsChange = 0
            prevStar = self._getFileName('rawFinalData')
            pTable = Table()
            origStar = self._getFileName('input_star', lev=1, rLev=1)
            opticsTable = Table(fileName=origStar, tableName='optics')
            print("OPTABLE: ", origStar, opticsTable.size())
            for row in pTable.iterRows(prevStar, key="rlnClassNumber",
                                       tableName='particles'):
                clsPart = row.rlnClassNumber
                newClass = labels[clsPart - 1] + 1
                newRow = row._replace(rlnClassNumber=newClass)

                if not newClass == clsChange:
                    if not clsChange == 0:
                        self.writeStar(fn, ouTable, opticsTable)
                    clsChange = newClass
                    fn = self._getFileName('input_star', lev=self._level,
                                           rLev=newClass)
                    tableIn = Table(fileName=prevStar, tableName='particles')
                    cols = [str(c) for c in tableIn.getColumnNames()]
                    ouTable = Table(columns=cols, tableName='particles')
                ouTable.addRow(*newRow)
            print("mergeClassesStep ouTable.size: ", ouTable.size())
            self.writeStar(fn, ouTable, opticsTable)

        else:
            prevData = self._getFileName('rawFinalData')
            finalData = self._getFileName('finalData')
            prevModel = self._getFileName('rawFinalModel')
            finalModel = self._getFileName('finalModel')
            copyFile(prevData, finalData)
            copyFile(prevModel, finalModel)
Beispiel #5
0
    def _mergeDataStar(self, rLev, callback):
        def _getMapId(rMap):
            try:
                return self._mapsDict[rMap]
            except:
                return None

        iters = self._lastIter(rLev)
        #metadata to save all particles that continues
        outData = self._getFileName('outputData', lev=self._level)
        #metadata to save all final particles
        finalData = self._getFileName('rawFinalData')
        imgStar = self._getFileName('data', iter=iters,
                                    lev=self._level, rLev=rLev)
        opTable = Table(filename=imgStar, tableName='optics')
        tableIn = Table(fileName=imgStar, tableName='particles')
        print("IMGSTAR: ", imgStar, "PARTS: ", tableIn.size())
        cols = [str(c) for c in tableIn.getColumnNames()]
        outTable = Table(columns=cols, tableName='particles')
        finalTable = Table(columns=cols, tableName='particles')

        if os.path.exists(outData):
            print("Exists ", outData)
            tmpTable = Table()
            for row in tmpTable.iterRows(outData, tableName='particles'):
                outTable.addRow(*row)

        if os.path.exists(finalData):
            print("Exists ", finalData)
            tpTable = Table()
            for row in tpTable.iterRows(finalData, tableName='particles'):
                finalTable.addRow(*row)

        pTable = Table()
        for row in pTable.iterRows(imgStar, key="rlnClassNumber",
                                   tableName='particles'):
            clsPart = row.rlnClassNumber
            rMap = callback(iters, rLev, clsPart)
            mapId = _getMapId(rMap)

            while mapId is None:
                for clsPart in range(1, self.numberOfClasses.get()+1):
                    rMap = callback(iters, rLev, clsPart)
                    mapId = _getMapId(rMap)
                    if mapId is not None:
                        break

            if self.stopDict[mapId]:
                # if mapId != newMapId:
                #     if newMapId != '00.000':
                #         print(mdClass)
                #         mdClass.write(classMd)
                #     classMd = self._getFileName('mdataForClass', id=mapId)
                #     mdClass = self._getMetadata(classMd)
                #     newMapId = mapId
                classId = self._clsIdDict[mapId]
                newRow = row._replace(rlnClassNumber=classId)
                finalTable.addRow(*newRow)
            else:
                classId = int(mapId.split('.')[1])
                newRow = row._replace(rlnClassNumber=classId)
                outTable.addRow(*newRow)
        # if self.stopDict[mapId]:
        #     if mdClass.size() != 0:
        #         mdClass.write(classMd)

        if finalTable.size() != 0:
            print("finalTable.size: ", finalTable.size())
            self.writeStar(finalData, finalTable)

        if outTable.size() != 0:
            print("outTable.size: ", outTable.size())
            self.writeStar(outData, outTable, opTable)
def run_job(project_dir, args):
    start = time.time()
    in_parts = args.in_parts
    job_dir = args.out_dir
    batch = args.batch_size
    gpu = args.gpu

    getPath = lambda *arglist: os.path.join(project_dir, *arglist)

    # Reading the model star file from relion
    modelstar = in_parts.replace("_data.star", "_model.star")
    refstable = Table(fileName=getPath(modelstar), tableName='model_classes')
    refstack = refstable[0].rlnReferenceImage.split("@")[-1]
    nrCls = int(refstable[-1].rlnReferenceImage.split("@")[0])

    if DEBUG:
        print("Found input class averages stack: %s" % refstack)

    # Launching cryoassess
    args_dict = {
        '-i': getPath(refstack),
        '-o': getPath(job_dir, 'output'),
        '-b': batch,
        '-m': CRYOASSESS_2D_MODEL,
    }
    cmd = "%s && CUDA_VISIBLE_DEVICES=%s %s " % (CONDA_ENV, gpu, CRYOASSESS_2D)
    cmd += " ".join(['%s %s' % (k, v) for k, v in args_dict.items()])

    print("Running command:\n{}".format(cmd))
    proc = subprocess.Popen(cmd, shell=True)
    proc.communicate()

    if proc.returncode:
        raise Exception("Command failed with return code %d" % proc.returncode)

    # Parse output to get good classes IDs
    goodTemplate = getPath(job_dir, "output/Good/particle_*.jpg")
    regex = re.compile('particle_(\d*)\.jpg')
    goodcls = []
    files = glob(goodTemplate)
    if files:
        for i in files:
            s = regex.search(i)
            goodcls.append(int(s.group(1)))

    if DEBUG:
        print("Parsing output files: %s\nGood classes: %s" %
              (goodTemplate, goodcls))

    if len(goodcls) == 0:
        print("No good classes found. Job stopped.")
        end = time.time()
        diff = end - start
        print("Job duration = %dh %dmin %dsec \n" %
              (diff // 3600, diff // 60 % 60, diff % 60))
        open(RELION_JOB_FAILURE_FILENAME, "w").close()
        exit(1)

    # Create output star file for Relion to use
    optics = Table(fileName=getPath(in_parts), tableName='optics')
    ptcls = Table(fileName=getPath(in_parts), tableName='particles')
    cols = ptcls.getColumnNames()
    out_ptcls = Table(columns=cols)

    for row in ptcls:
        if row.rlnClassNumber in goodcls:
            out_ptcls.addRow(*row)

    if DEBUG:
        print("Input particles: %d\nOutput particles: %d" %
              (len(ptcls), len(out_ptcls)))

    out_star = getPath(job_dir, "particles_for_training.star")
    with open(out_star, "w") as f:
        optics.writeStar(f, tableName="optics")
        out_ptcls.writeStar(f, tableName="particles")

    # Create backup_selection.star for results visualization
    sel = Table(columns=['rlnSelected'])
    for i in range(1, nrCls + 1):
        sel.addRow(1 if i in goodcls else 0)
    with open(getPath("backup_selection.star"), "w") as f:
        sel.writeStar(f, tableName="")

    end = time.time()
    diff = end - start
    print("Job duration = %dh %dmin %dsec \n" %
          (diff // 3600, diff // 60 % 60, diff % 60))
Beispiel #7
0
    def convertInputStep(self, resetDeps, copyAlignment):
        import random
        """ Create the input file in STAR format as expected by Relion.
        If the input particles comes from Relion, just link the file.
        """
        if self._level == 0:
            makePath(self._getRunPath(self._level, 1))
            imgStar = self._getFileName('input_star', lev=self._level, rLev=0)
            self._convertStar(copyAlignment, imgStar)
            opticsTable = Table(fileName=imgStar, tableName='optics')
            partsTable = Table(fileName=imgStar, tableName='particles')
            self._convertVol(ImageHandler(), self.inputVolumes.get())
            mdSize = partsTable.size()

            for i in range(9, 1, -1):
                makePath(self._getRunPath(self._level, i))
                mStar = self._getFileName('input_star', lev=self._level, rLev=i)
                size = 10000 * i if mdSize >= 100000 else int(mdSize * 0.1 * i)
                print("partsTable: ", size, i, mdSize)
                partsTable._rows = random.sample(partsTable._rows, k=size)
                self.writeStar(mStar, partsTable, opticsTable)

        elif self._level == 1:
            imgStar = self._getFileName('input_star', lev=self._level, rLev=1)
            makePath(self._getRunPath(self._level, 1))
            self._convertStar(copyAlignment, imgStar)

            # find a clever way to avoid volume conversion if its already done.
            self._convertVol(ImageHandler(), self.inputVolumes.get())
        else:
            lastCls = None
            prevStar = self._getFileName('outputData', lev=self._level - 1)
            firstStarFn = self._getFileName('input_star', lev=1, rLev=1)
            # mdData = md.MetaData(prevStar)
            opTable = Table(fileName=firstStarFn, tableName='optics')

            tableIn = Table(fileName=prevStar, tableName='particles')
            cols = [str(c) for c in tableIn.getColumnNames()]

            pTable = Table()
            for row in pTable.iterRows(prevStar, key="rlnClassNumber",
                                       tableName='particles'):
                clsPart = row.rlnClassNumber
                if clsPart != lastCls:
                    makePath(self._getRunPath(self._level, clsPart))

                    if lastCls is not None:
                        print("writing %s" % fn)
                        # mdInput.write(fn)
                        self.writeStar(fn, newPTable, opTable)
                    paths = self._getRunPath(self._level, clsPart)
                    makePath(paths)
                    print ("Path: %s and newRlev: %d" % (paths, clsPart))
                    lastCls = clsPart
                    newPTable = Table(columns=cols, tableName='particles')
                    fn = self._getFileName('input_star', lev=self._level,
                                           rLev=clsPart)
                # objId = mdInput.addObject()
                newPTable.addRow(*row)
                # row.writeToMd(mdInput, objId)
            print("writing %s and ending the loop" % fn)
            self.writeStar(fn, newPTable, opTable)
def run_job(project_dir, args):
    start = time.time()
    in_parts = args.in_parts
    job_dir = args.out_dir
    thresh = args.threshold
    model = args.model
    gpus = args.gpu

    getPath = lambda *arglist: os.path.join(project_dir, *arglist)

    if model == "None":
        model = CINDERELLA_GEN_MODEL
    else:
        model = getPath(model)

    # Reading the model star file from relion
    modelstar = in_parts.replace("_data.star", "_model.star")
    refstable = Table(fileName=getPath(modelstar), tableName='model_classes')
    refstack = refstable[0].rlnReferenceImage.split("@")[-1]
    nrCls = int(refstable[-1].rlnReferenceImage.split("@")[0])

    if DEBUG:
        print("Found input class averages stack: %s" % refstack)

    # Launching cinderella
    args_dict = {
        '-i': getPath(refstack),
        '-o': 'output',
        '-w': model,
        '--gpu': gpus,
        '-t': thresh,
    }
    cmd = "%s && %s " % (CONDA_ENV, CINDERELLA_PREDICT)
    cmd += " ".join(['%s %s' % (k, v) for k, v in args_dict.items()])

    print("Running command:\n{}".format(cmd))
    proc = subprocess.Popen(cmd, shell=True)
    proc.communicate()

    if proc.returncode:
        raise Exception("Command failed with return code %d" % proc.returncode)

    # Parse output to get good classes IDs
    outfn = os.path.basename(refstack.replace(".mrcs", "_index_confidence.txt"))
    outpath = getPath(job_dir, "output", outfn)
    goodcls = []
    with open(outpath, "r") as f:
        for line in f:
            if float(line.split()[1]) > thresh:
                goodcls.append(int(line.split()[0]) + 1)
            else:
                break

    if DEBUG:
        print("Parsing output file: %s\nGood classes: %s" % (outpath, goodcls))

    if len(goodcls) == 0:
        print("No good classes found. Job stopped.")
        end = time.time()
        diff = end - start
        print("Job duration = %dh %dmin %dsec \n" % (diff // 3600, diff // 60 % 60, diff % 60))
        open(RELION_JOB_FAILURE_FILENAME, "w").close()
        exit(1)

    # Create output star file for Relion to use
    optics = Table(fileName=getPath(in_parts), tableName='optics')
    ptcls = Table(fileName=getPath(in_parts), tableName='particles')
    cols = ptcls.getColumnNames()
    out_ptcls = Table(columns=cols)
    
    for row in ptcls:
        if row.rlnClassNumber in goodcls:
            out_ptcls.addRow(*row)

    if DEBUG:
        print("Input particles: %d\nOutput particles: %d" % 
              (len(ptcls), len(out_ptcls)))

    out_star = getPath(job_dir, "particles_for_training.star")
    with open(out_star, "w") as f:
        optics.writeStar(f, tableName="optics")
        out_ptcls.writeStar(f, tableName="particles")

    # Create backup_selection.star for results visualization
    sel = Table(columns=['rlnSelected'])
    for i in range(1, nrCls + 1):
        sel.addRow(1 if i in goodcls else 0)
    with open(getPath("backup_selection.star"), "w") as f:
        sel.writeStar(f, tableName="")

    end = time.time()
    diff = end - start
    print("Job duration = %dh %dmin %dsec \n" % (diff//3600, diff//60 % 60, diff % 60))