Beispiel #1
0
    def _showChanges(self, paramName=None):
        labels = ['rlnIterationNumber'] + self.protocol.CHANGE_LABELS
        tableChanges = Table(columns=labels)

        print(
            "Computing average changes in offset, angles, and class membership"
        )
        for it in self._getAllIters():
            fn = self.protocol._getFileName('optimiser', iter=it)
            if not os.path.exists(fn):
                continue
            print("Computing data for iteration; %03d" % it)
            fn = self.protocol._getFileName('optimiser', iter=it)
            table = Table(fileName=fn, tableName='optimiser_general')
            row = table[0]
            cols = [
                getattr(row, value) for value in self.protocol.CHANGE_LABELS
            ]
            tableChanges.addRow(it, *cols)

        fn = self.protocol._getFileName('all_changes')

        with open(fn, 'w') as f:
            tableChanges.writeStar(f)

        return [self.createDataView(fn)]
Beispiel #2
0
    def _showPMax(self, paramName=None):
        labels = ['rlnIterationNumber', 'rlnAveragePmax', 'rlnLogLikelihood']
        tablePMax = Table(columns=labels)

        for it in self._getAllIters():
            if it == 1:  # skip iter1 with Pmax=1
                continue
            # always list all iterations
            prefix = self.protocol.PREFIXES[0]
            fn = self.protocol._getFileName(prefix + 'model', iter=it)
            table = Table(fileName=fn, tableName='model_general')
            row = table[0]
            tablePMax.addRow(int(it), float(row.rlnAveragePmax),
                             float(row.rlnLogLikelihood))

        fn = self.protocol._getFileName('all_avgPmax')
        with open(fn, 'w') as f:
            tablePMax.writeStar(f)

        xplotter = RelionPlotter()
        xplotter.createSubPlot("Avg PMax per Iterations", "Iterations",
                               "Avg PMax")
        xplotter.plotMd(tablePMax, 'rlnIterationNumber', 'rlnAveragePmax')
        xplotter.showLegend(['rlnAveragePmax'])

        return [self.createDataView(fn), xplotter]
 def _write(self, f):
     # Create columns from the first row
     items = self.first()._asdict().items()
     cols = [Table.Column(k, type(v)) for k, v in items]
     t = Table(columns=cols)
     for og in self._dict.values():
         t.addRow(*og)
     t.writeStar(f, tableName='optics')
 def convertInputStep(self, newMics, numPass):
     """ Create a star file as expected by cryoassess."""
     micsTable = Table(columns=['rlnMicrographName'])
     for mic in newMics:
         micsTable.addRow(os.path.abspath(mic.getFileName()))
     with open(self.getInputFilename(numPass), 'w') as f:
         f.write("# Star file generated with Scipion\n")
         micsTable.writeStar(f, tableName='')
     self.appendTotalInputStar(numPass)
Beispiel #5
0
def writeCoordsConfig(configFn, boxSize, state):
    """ Write the config.xmd file needed for Xmipp picker.
    Params:
        configFn: The filename were to store the configuration.
        boxSize: the box size in pixels for extraction.
        state: picker state
    """
    # Write config.xmd metadata
    print("writeCoordsConfig: state=", state)
    table = Table(columns=['particleSize', 'pickingState'])
    table.addRow(int(boxSize), state)
    table.write(configFn, tableName='properties')
Beispiel #6
0
    def test_write_singleRow(self):
        fn = '/tmp/test-single-row.star'
        print("Writing a single row to %s..." % fn)
        t = Table()
        f1 = StringIO(one_micrograph_mc)
        t.readStar(f1, tableName='global_shift')
        t.writeStar(sys.stdout, tableName='global_shift', singleRow=True)

        t = Table(columns=['rlnImageSizeX',
                           'rlnImageSizeY',
                           'rlnMicrographMovieName'])
        t.addRow(3710, 3838, 'Movies/14sep05c_00024sq_00003hl_00002es.frames.out.mrc')

        with open(fn, 'w') as f:
            t.writeStar(f, singleRow=True)
    def addColumns(self, **kwargs):
        """ Add new columns with default values (type inferred from it). """
        items = self.first()._asdict().items()
        cols = [Table.Column(k, type(v)) for k, v in items]

        for k, v in kwargs.items():
            cols.append(Table.Column(k, type(v)))

        t = Table(columns=cols)

        for og in self._dict.values():
            values = og._asdict()
            values.update(kwargs)
            t.addRow(**values)

        self.__fromTable(t)
Beispiel #8
0
    def createFinalFilesStep(self):
        # -----metadata to save all final models-------
        finalModel = self._getFileName('finalModel')
        finalModelMd = self._getMetadata()

        # -----metadata to save all final particles-----
        finalData = self._getFileName('finalData')

        fn = self._getFileName('rawFinalData')
        print("FN: ", fn)
        tableIn = Table(fileName=fn, tableName='particles')
        cols = [str(c) for c in tableIn.getColumnNames()]
        ouTable = Table(columns=cols, tableName='particles')

        for rLev in self._getRLevList():
            it = self._lastIter(rLev)
            modelFn = self._getFileName('model', iter=it,
                                        lev=self._level, rLev=rLev)
            modelMd = self._getMetadata('model_classes@' + modelFn)

            refLabel = md.RLN_MLMODEL_REF_IMAGE
            imgRow = md.getFirstRow(modelMd)
            fn = imgRow.getValue(refLabel)

            mapId = self._getRunLevId(rLev=rLev)
            newMap = self._getMapById(mapId)
            imgRow.setValue(refLabel, newMap)
            copyFile(fn, newMap)
            self._mapsDict[fn] = mapId

            imgRow.addToMd(finalModelMd)

            dataFn = self._getFileName('data', iter=it,
                                       lev=self._level, rLev=rLev)

            pTable = Table()
            for row in pTable.iterRows(dataFn, tableName='particles'):
                newRow = row._replace(rlnClassNumber=rLev)
                ouTable.addRow(*newRow)

        self.writeStar(finalData, ouTable)
        finalModelMd.write('model_classes@' + finalModel)
Beispiel #9
0
    def mergeClassesStep(self):
        if self.doGrouping:
            from cryomethods.functions import NumpyImgHandler
            npIh = NumpyImgHandler()
            makePath(self._getLevelPath(self._level))
            listVol = self._getFinalMaps()
            matrix = npIh.getAllNpList(listVol, 2)
            labels = self._clusteringData(matrix)

            clsChange = 0
            prevStar = self._getFileName('rawFinalData')
            pTable = Table()
            origStar = self._getFileName('input_star', lev=1, rLev=1)
            opticsTable = Table(fileName=origStar, tableName='optics')
            print("OPTABLE: ", origStar, opticsTable.size())
            for row in pTable.iterRows(prevStar, key="rlnClassNumber",
                                       tableName='particles'):
                clsPart = row.rlnClassNumber
                newClass = labels[clsPart - 1] + 1
                newRow = row._replace(rlnClassNumber=newClass)

                if not newClass == clsChange:
                    if not clsChange == 0:
                        self.writeStar(fn, ouTable, opticsTable)
                    clsChange = newClass
                    fn = self._getFileName('input_star', lev=self._level,
                                           rLev=newClass)
                    tableIn = Table(fileName=prevStar, tableName='particles')
                    cols = [str(c) for c in tableIn.getColumnNames()]
                    ouTable = Table(columns=cols, tableName='particles')
                ouTable.addRow(*newRow)
            print("mergeClassesStep ouTable.size: ", ouTable.size())
            self.writeStar(fn, ouTable, opticsTable)

        else:
            prevData = self._getFileName('rawFinalData')
            finalData = self._getFileName('finalData')
            prevModel = self._getFileName('rawFinalModel')
            finalModel = self._getFileName('finalModel')
            copyFile(prevData, finalData)
            copyFile(prevModel, finalModel)
Beispiel #10
0
    def test_addRows(self):
        print("Checking addRows...")
        t1 = Table()
        f1 = StringIO(particles_3d_classify)

        t1.readStar(f1)
        nRows = len(t1)
        lastRow = t1[-1]

        values = [378.000000, 2826.000000, 5.360625, 4, -87.35289,
                  "000100@Extract/job012/Movies/20170629_00021_frameImage.mrcs",
                  "MotionCorr/job002/Movies/20170629_00021_frameImage.mrc",
                  1, 4.809192, 0.131159, 10864.146484, 10575.793945, 77.995003, 0.000000,
                  1.000000, 0.000000, 1, 81.264321, 138.043147, 4.959233, -2.12077,
                  0.798727, 10937.130965, 0.998434, 5
                  ]

        for i in range(1, 4):
            values[4] = nRows + 1
            t1.addRow(*values)

        self.assertEqual(nRows + 3, len(t1))
        newLastRow = t1[-1]
        self.assertEqual(len(lastRow), len(newLastRow))
Beispiel #11
0
    def _mergeDataStar(self, rLev, callback):
        def _getMapId(rMap):
            try:
                return self._mapsDict[rMap]
            except:
                return None

        iters = self._lastIter(rLev)
        #metadata to save all particles that continues
        outData = self._getFileName('outputData', lev=self._level)
        #metadata to save all final particles
        finalData = self._getFileName('rawFinalData')
        imgStar = self._getFileName('data', iter=iters,
                                    lev=self._level, rLev=rLev)
        opTable = Table(filename=imgStar, tableName='optics')
        tableIn = Table(fileName=imgStar, tableName='particles')
        print("IMGSTAR: ", imgStar, "PARTS: ", tableIn.size())
        cols = [str(c) for c in tableIn.getColumnNames()]
        outTable = Table(columns=cols, tableName='particles')
        finalTable = Table(columns=cols, tableName='particles')

        if os.path.exists(outData):
            print("Exists ", outData)
            tmpTable = Table()
            for row in tmpTable.iterRows(outData, tableName='particles'):
                outTable.addRow(*row)

        if os.path.exists(finalData):
            print("Exists ", finalData)
            tpTable = Table()
            for row in tpTable.iterRows(finalData, tableName='particles'):
                finalTable.addRow(*row)

        pTable = Table()
        for row in pTable.iterRows(imgStar, key="rlnClassNumber",
                                   tableName='particles'):
            clsPart = row.rlnClassNumber
            rMap = callback(iters, rLev, clsPart)
            mapId = _getMapId(rMap)

            while mapId is None:
                for clsPart in range(1, self.numberOfClasses.get()+1):
                    rMap = callback(iters, rLev, clsPart)
                    mapId = _getMapId(rMap)
                    if mapId is not None:
                        break

            if self.stopDict[mapId]:
                # if mapId != newMapId:
                #     if newMapId != '00.000':
                #         print(mdClass)
                #         mdClass.write(classMd)
                #     classMd = self._getFileName('mdataForClass', id=mapId)
                #     mdClass = self._getMetadata(classMd)
                #     newMapId = mapId
                classId = self._clsIdDict[mapId]
                newRow = row._replace(rlnClassNumber=classId)
                finalTable.addRow(*newRow)
            else:
                classId = int(mapId.split('.')[1])
                newRow = row._replace(rlnClassNumber=classId)
                outTable.addRow(*newRow)
        # if self.stopDict[mapId]:
        #     if mdClass.size() != 0:
        #         mdClass.write(classMd)

        if finalTable.size() != 0:
            print("finalTable.size: ", finalTable.size())
            self.writeStar(finalData, finalTable)

        if outTable.size() != 0:
            print("outTable.size: ", outTable.size())
            self.writeStar(outData, outTable, opTable)
    def convertInputStep(self, movId, partId, postId):
        inputMovies = self.inputMovies.get()
        inputParts = self.inputParticles.get()
        imgStar = self._getPath('input_particles.star')
        inputPartsFolder = self._getInputPath('particles')
        pwutils.makePath(inputPartsFolder)

        self.info("Converting set from '%s' into '%s'" %
                  (inputParts.getFileName(), imgStar))

        tableMovies = Table(
            columns=['rlnMicrographName', 'rlnMicrographMetadata'])
        tableGeneral = Table(columns=[
            'rlnImageSizeX', 'rlnImageSizeY', 'rlnImageSizeZ',
            'rlnMicrographMovieName', 'rlnMicrographBinning',
            'rlnMicrographOriginalPixelSize', 'rlnMicrographDoseRate',
            'rlnMicrographPreExposure', 'rlnVoltage',
            'rlnMicrographStartFrame', 'rlnMotionModelVersion'
        ])
        tableShifts = Table(columns=[
            'rlnMicrographFrameNumber', 'rlnMicrographShiftX',
            'rlnMicrographShiftY'
        ])
        tableCoeffs = Table(
            columns=['rlnMotionModelCoeffsIdx', 'rlnMotionModelCoeff'])

        # Create the first row, later only the movieName will be updated
        xdim, ydim, ndim = inputMovies.getDim()
        acq = inputMovies.getAcquisition()
        firstMovie = inputMovies.getFirstItem()
        a0, aN = firstMovie.getAlignment().getRange()
        moviesPixelSize = inputMovies.getSamplingRate()
        binningFactor = inputParts.getSamplingRate() / moviesPixelSize
        hasLocal = firstMovie.hasAttribute('_rlnMotionModelCoeff')
        motionMode = 1 if hasLocal else 0

        tableGeneral.addRow(xdim, ydim, ndim, 'movieName',
                            binningFactor, moviesPixelSize,
                            acq.getDosePerFrame(), acq.getDoseInitial(),
                            acq.getVoltage(), a0, motionMode)
        row = tableGeneral[0]

        for movie in inputMovies:
            movieFn = movie.getFileName()
            movieBase = os.path.basename(movieFn)
            movieStar = self._getInputPath(
                pwutils.replaceBaseExt(movieFn, 'star'))
            tableMovies.addRow(movieBase, movieStar)
            with open(movieStar, 'w') as f:
                # Update Movie name
                tableGeneral[0] = row._replace(rlnMicrographMovieName=movieFn)
                tableGeneral.writeStar(f, tableName='general', singleRow=True)
                # Write shifts
                tableShifts.clearRows()
                alignment = movie.getAlignment()
                shiftsX, shiftsY = alignment.getShifts()
                a0, aN = alignment.getRange()
                empty = -9999.000
                for i in range(1, a0):
                    tableShifts.addRow(i, empty, empty)
                # Adjust the shifts to be relative to the first frame
                # so let's add the opposite value
                xoff, yoff = -shiftsX[0], -shiftsY[0]
                for i in range(a0, aN + 1):
                    tableShifts.addRow(i, shiftsX[i - a0] + xoff,
                                       shiftsY[i - a0] + yoff)
                for i in range(aN + 1, ndim + 1):
                    tableShifts.addRow(i, empty, empty)
                tableShifts.writeStar(f, tableName='global_shift')

                # Write coefficients
                if hasLocal:
                    coeffs = movie.getAttributeValue('_rlnMotionModelCoeff',
                                                     '')
                    tableCoeffs.clearRows()
                    for i, c in enumerate(json.loads(coeffs)):
                        tableCoeffs.addRow(i, c)
                    tableCoeffs.writeStar(f, tableName='local_motion_model')

        with open(self._getPath('input_corrected_micrographs.star'), 'w') as f:
            tableMovies.writeStar(f)

        convert.writeSetOfParticles(inputParts,
                                    imgStar,
                                    outputDir=inputPartsFolder,
                                    alignType=ALIGN_PROJ,
                                    fillMagnification=True)
Beispiel #13
0
def run_job(args):
    start = time.time()
    in_mics = args.in_mics
    job_dir = args.out_dir
    thresh = args.threshold
    box_size = args.box_size
    distance = 0
    model = args.model
    filament = args.filament
    if filament:
        box_dist = args.box_distance
        min_boxes = args.minimum_number_boxes
    denoise = args.denoise
    gpus = args.gpu
    threads = args.threads

    if SCRATCH_DIR is not None:
        filtered_dir = os.path.join(SCRATCH_DIR, "filtered_tmp")
    else:
        filtered_dir = "%s/filtered_tmp/" % job_dir

    if model == "None":
        model = CRYOLO_GEN_MODEL if not denoise else CRYOLO_GEN_JANNI_MODEL
    else:
        model = os.path.abspath(model)

    # Making a cryolo config file
    json_dict = {
        "model": {
            "architecture": "PhosaurusNet",
            "input_size": 1024,
            "max_box_per_image": 600,
            "filter": [0.1, filtered_dir]
        },
        "other": {
            "log_path": "%s/logs/" % job_dir
        }
    }
    if box_size:  # is not 0
        json_dict["model"]["anchors"] = [int(box_size), int(box_size)]
        if not filament:
            distance = int(box_size / 2)  # use half the box_size
    if denoise:
        json_dict["model"]["filter"] = [
            CRYOLO_JANNI_MODEL, 24, 3, filtered_dir
        ]

    if DEBUG:
        print("Using following config: ", json_dict)

    with open(os.path.join(job_dir, "config_cryolo.json"), "w") as json_file:
        json.dump(json_dict, json_file, indent=4)

    # Reading the micrographs star file from Relion
    mictable = Table(fileName=in_mics, tableName='micrographs')
    mic_fns = mictable.getColumnValues("rlnMicrographName")

    # Launching cryolo
    args_dict = {
        '--conf': os.path.join(job_dir, "config_cryolo.json"),
        '--input': in_mics,
        '--output': os.path.join(job_dir, 'output'),
        '--weights': model,
        '--gpu': gpus.replace(',', ' '),
        '--threshold': thresh,
        '--distance': distance,
        '--cleanup': "",
        '--skip': "",
        '--write_empty': "",
        '--num_cpu': -1 if threads == 1 else threads
    }

    if filament:
        args_dict.update({
            '--filament': "",
            '--box_distance': box_dist,
            '--minimum_number_boxes': min_boxes,
            '--directional_method': 'PREDICTED'
        })
        args_dict.pop('--distance')

    cmd = "%s && %s " % (CONDA_ENV, CRYOLO_PREDICT)
    cmd += " ".join(['%s %s' % (k, v) for k, v in args_dict.items()])

    print("Running command:\n{}".format(cmd))
    proc = subprocess.Popen(cmd, shell=True)
    proc.communicate()

    if proc.returncode:
        raise Exception("Command failed with return code %d" % proc.returncode)

    # Moving output star files for Relion to use
    table_coords = Table(
        columns=['rlnMicrographName', 'rlnMicrographCoordinates'])
    star_dir = "EMAN_HELIX_SEGMENTED" if filament else "STAR"
    ext = ".box" if filament else ".star"
    with open(os.path.join(job_dir, "autopick.star"), "w") as mics_star:
        for mic in mic_fns:
            mic_base = os.path.basename(mic)
            mic_dir = os.path.dirname(mic)
            if len(mic_dir.split("/")) > 1 and "job" in mic_dir.split(
                    "/")[1]:  # remove JobType/jobXXX
                mic_dir = "/".join(mic_dir.split("/")[2:])
            os.makedirs(os.path.join(job_dir, mic_dir), exist_ok=True)
            coord_cryolo = os.path.splitext(mic_base)[0] + ext
            coord_cryolo = os.path.join(job_dir, "output", star_dir,
                                        coord_cryolo)
            coord_relion = os.path.splitext(mic_base)[0] + "_autopick" + ext
            coord_relion = os.path.join(job_dir, mic_dir, coord_relion)
            if os.path.exists(coord_cryolo):
                os.rename(coord_cryolo, coord_relion)
                table_coords.addRow(mic, coord_relion)
                if DEBUG:
                    print("Moved %s to %s" % (coord_cryolo, coord_relion))
        table_coords.writeStar(mics_star, tableName='coordinate_files')

    # Required output to mini pipeline job_pipeline.star file
    pipeline_fn = os.path.join(job_dir, "job_pipeline.star")
    table_gen = Table(columns=['rlnPipeLineJobCounter'])
    table_gen.addRow(2)
    table_proc = Table(columns=[
        'rlnPipeLineProcessName', 'rlnPipeLineProcessAlias',
        'rlnPipeLineProcessTypeLabel', 'rlnPipeLineProcessStatusLabel'
    ])
    table_proc.addRow(job_dir, 'None', 'relion.external', 'Running')
    table_nodes = Table(
        columns=['rlnPipeLineNodeName', 'rlnPipeLineNodeTypeLabel'])
    table_nodes.addRow(in_mics, "MicrographsData.star.relion")
    table_nodes.addRow(os.path.join(job_dir, "autopick.star"),
                       "MicrographsCoords.star.relion.autopick")
    table_input = Table(
        columns=['rlnPipeLineEdgeFromNode', 'rlnPipeLineEdgeProcess'])
    table_input.addRow(in_mics, job_dir)
    table_output = Table(
        columns=['rlnPipeLineEdgeProcess', 'rlnPipeLineEdgeToNode'])
    table_output.addRow(job_dir, os.path.join(job_dir, "autopick.star"))

    with open(pipeline_fn, "w") as f:
        table_gen.writeStar(f, tableName="pipeline_general", singleRow=True)
        table_proc.writeStar(f, tableName="pipeline_processes")
        table_nodes.writeStar(f, tableName="pipeline_nodes")
        table_input.writeStar(f, tableName="pipeline_input_edges")
        table_output.writeStar(f, tableName="pipeline_output_edges")

    # Register output nodes in .Nodes/
    os.makedirs(os.path.join(".Nodes", "MicrographsCoords", job_dir),
                exist_ok=True)
    open(os.path.join(".Nodes", "MicrographsCoords", job_dir, "autopick.star"),
         "w").close()

    outputFn = os.path.join(job_dir, "output_for_relion.star")
    if not os.path.exists(outputFn):
        # get estimated box size
        summaryfn = os.path.join(job_dir, "output/DISTR",
                                 'size_distribution_summary*.txt')
        with open(glob(summaryfn)[0]) as f:
            for line in f:
                if line.startswith("MEAN,"):
                    estim_sizepx = int(line.split(",")[-1])
                    break
        print("\ncrYOLO estimated box size %d px" % estim_sizepx)

        # calculate diameter, original (boxSize) and downsampled (boxSizeSmall) box
        optics = Table(fileName=in_mics, tableName='optics')
        angpix = float(optics[0].rlnMicrographPixelSize)

        if filament:
            # box size = 1.5x tube diam
            diam = 0.66 * box_size
        else:
            # use + 20% for diameter
            diam = math.ceil(estim_sizepx * angpix * 1.2)
            # use +30% for box size, make it even
            boxSize = 1.3 * estim_sizepx
            boxSize = math.ceil(boxSize / 2.) * 2

            # from relion_it.py script
            # Authors: Sjors H.W. Scheres, Takanori Nakane & Colin M. Palmer
            boxSizeSmall = None
            for box in (48, 64, 96, 128, 160, 192, 256, 288, 300, 320, 360,
                        384, 400, 420, 450, 480, 512, 640, 768, 896, 1024):
                # Don't go larger than the original box
                if box > boxSize:
                    boxSizeSmall = boxSize
                    break
                # If Nyquist freq. is better than 7.5 A, use this
                # downscaled box, otherwise continue to next size up
                small_box_angpix = angpix * boxSize / box
                if small_box_angpix < 3.75:
                    boxSizeSmall = box
                    break

            print(
                "\nSuggested parameters:\n\tDiameter (A): %d\n\tBox size (px): %d\n"
                "\tBox size binned (px): %d" % (diam, boxSize, boxSizeSmall))

            # output all params into a star file
            tableCryolo = Table(columns=[
                'rlnParticleDiameter', 'rlnOriginalImageSize', 'rlnImageSize'
            ])
            tableCryolo.addRow(diam, boxSize, boxSizeSmall)
            with open(outputFn, "w") as f:
                tableCryolo.writeStar(f, tableName='picker')

        # create .gui_manualpickjob.star for easy display
        starString = """
# version 30001

data_job

_rlnJobTypeLabel             relion.manualpick%s
_rlnJobIsContinue                       0
_rlnJobIsTomo                           0

# version 30001

data_joboptions_values

loop_
_rlnJobOptionVariable #1
_rlnJobOptionValue #2
    angpix         %f
 black_val          0
blue_value          0
color_label rlnParticleSelectZScore
  diameter         %d
  do_color         No
do_fom_threshold         No
  do_queue         No
do_startend        No
  fn_color         ""
     fn_in         ""
  highpass         -1
   lowpass         20
  micscale        0.2
min_dedicated       1
minimum_pick_fom          0
other_args         ""
      qsub       qsub
qsubscript /public/EM/RELION/relion/bin/relion_qsub.csh
 queuename    openmpi
 red_value          2
sigma_contrast      3
 white_val          0
"""
        label = ".helical" if filament else ""
        with open(".gui_manualpickjob.star", "w") as f:
            f.write(starString % (label, angpix, diam))

    end = time.time()
    diff = end - start
    print("Job duration = %dh %dmin %dsec \n" %
          (diff // 3600, diff // 60 % 60, diff % 60))
Beispiel #14
0
    def convertInputStep(self, resetDeps, copyAlignment):
        import random
        """ Create the input file in STAR format as expected by Relion.
        If the input particles comes from Relion, just link the file.
        """
        if self._level == 0:
            makePath(self._getRunPath(self._level, 1))
            imgStar = self._getFileName('input_star', lev=self._level, rLev=0)
            self._convertStar(copyAlignment, imgStar)
            opticsTable = Table(fileName=imgStar, tableName='optics')
            partsTable = Table(fileName=imgStar, tableName='particles')
            self._convertVol(ImageHandler(), self.inputVolumes.get())
            mdSize = partsTable.size()

            for i in range(9, 1, -1):
                makePath(self._getRunPath(self._level, i))
                mStar = self._getFileName('input_star', lev=self._level, rLev=i)
                size = 10000 * i if mdSize >= 100000 else int(mdSize * 0.1 * i)
                print("partsTable: ", size, i, mdSize)
                partsTable._rows = random.sample(partsTable._rows, k=size)
                self.writeStar(mStar, partsTable, opticsTable)

        elif self._level == 1:
            imgStar = self._getFileName('input_star', lev=self._level, rLev=1)
            makePath(self._getRunPath(self._level, 1))
            self._convertStar(copyAlignment, imgStar)

            # find a clever way to avoid volume conversion if its already done.
            self._convertVol(ImageHandler(), self.inputVolumes.get())
        else:
            lastCls = None
            prevStar = self._getFileName('outputData', lev=self._level - 1)
            firstStarFn = self._getFileName('input_star', lev=1, rLev=1)
            # mdData = md.MetaData(prevStar)
            opTable = Table(fileName=firstStarFn, tableName='optics')

            tableIn = Table(fileName=prevStar, tableName='particles')
            cols = [str(c) for c in tableIn.getColumnNames()]

            pTable = Table()
            for row in pTable.iterRows(prevStar, key="rlnClassNumber",
                                       tableName='particles'):
                clsPart = row.rlnClassNumber
                if clsPart != lastCls:
                    makePath(self._getRunPath(self._level, clsPart))

                    if lastCls is not None:
                        print("writing %s" % fn)
                        # mdInput.write(fn)
                        self.writeStar(fn, newPTable, opTable)
                    paths = self._getRunPath(self._level, clsPart)
                    makePath(paths)
                    print ("Path: %s and newRlev: %d" % (paths, clsPart))
                    lastCls = clsPart
                    newPTable = Table(columns=cols, tableName='particles')
                    fn = self._getFileName('input_star', lev=self._level,
                                           rLev=clsPart)
                # objId = mdInput.addObject()
                newPTable.addRow(*row)
                # row.writeToMd(mdInput, objId)
            print("writing %s and ending the loop" % fn)
            self.writeStar(fn, newPTable, opTable)
Beispiel #15
0
def run_job(project_dir, args):
    start = time.time()
    in_parts = args.in_parts
    job_dir = args.out_dir
    model = args.model or CRYOLO_GEN_MODEL
    gpus = args.gpu

    getPath = lambda *arglist: os.path.join(project_dir, *arglist)

    if SCRATCH_DIR is not None:
        filtered_dir = os.path.join(SCRATCH_DIR, "filtered_tmp")
    else:
        filtered_dir = "%s/filtered_tmp/" % job_dir

    # Create folder structure for cryolo
    os.mkdir(IMG_FOLDER)
    os.mkdir(ANNOT_FOLDER)

    # Reading the box size from relion
    optics = Table(fileName=getPath(in_parts), tableName='optics')[0]
    box_bin = int(optics.rlnImageSize)
    box_size = float(optics.rlnImagePixelSize) // float(optics.rlnMicrographOriginalPixelSize) * box_bin
    print("Using unbinned box size of %d px" % box_size)

    # Making a cryolo config file
    json_dict = {
        "model": {
            "architecture": "PhosaurusNet",
            "input_size": 1024,
            "max_box_per_image": 600,
            "anchors": [box_size, box_size],
            "filter": [
                0.1,
                filtered_dir
            ]
        },
        "train": {
            "train_image_folder": IMG_FOLDER,
            "train_annot_folder": ANNOT_FOLDER,
            "train_times": 10,
            "batch_size": 6,
            "learning_rate": 0.0001,
            "nb_epoch": 200,
            "object_scale": 5.0,
            "no_object_scale": 1.0,
            "coord_scale": 1.0,
            "class_scale": 1.0,
            "pretrained_weights": "%s" % model,
            "saved_weights_name": getPath(job_dir, TUNE_MODEL),
            "debug": True
        },
        "valid": {
            "valid_image_folder": "",
            "valid_annot_folder": "",
            "valid_times": 1
        }
    }

    if DEBUG:
        print("Using following config: ", json_dict)

    with open("config_cryolo.json", "w") as json_file:
        json.dump(json_dict, json_file, indent=4)

    # Reading the particles from relion
    try:
        parttable = Table(fileName=getPath(in_parts), tableName='particles')
    except:
        print("Could not read particles table from %s. Stopping" % in_parts)
        return
    mics_dict = {}

    # Arranging files for cryolo: making symlinks for mics and creating box files
    for row in parttable:
        mic = row.rlnMicrographName
        xCoord = int(int(row.rlnCoordinateX) - box_size / 2)
        yCoord = int(int(row.rlnCoordinateY) - box_size / 2)
        if mic in mics_dict:
            mics_dict[mic].append((xCoord, yCoord))
        else:
            mics_dict[mic] = [(xCoord, yCoord)]

    for mic in mics_dict:
        micSrc = getPath(mic)
        micDst = getPath(job_dir, IMG_FOLDER, os.path.basename(mic))
        if not os.path.exists(micDst):
            os.symlink(micSrc, micDst)
        if DEBUG:
            print("Link %s --> %s" % (micSrc, micDst))

        box = os.path.splitext(micDst)[0] + ".box"
        box = box.replace(IMG_FOLDER, ANNOT_FOLDER)
        with open(box, "w") as f:
            for coords in mics_dict[mic]:
                f.write("%s\t%s\t%s\t%s\n" %
                        (coords[0], coords[1], box_size, box_size))
        if DEBUG:
            print("Created box file: %s" % box)

    # Launching cryolo
    args_dict = {
        '--conf': "config_cryolo.json",
        '--gpu': gpus.replace(',', ' '),
        '--warmup': 0,
        '--fine_tune': "",
        '--cleanup': ""
    }
    cmd = "%s && %s " % (CONDA_ENV, CRYOLO_TRAIN)
    cmd += " ".join(['%s %s' % (k, v) for k, v in args_dict.items()])

    print("Running command:\n{}".format(cmd))
    proc = subprocess.Popen(cmd, shell=True)
    proc.communicate()

    if proc.returncode:
        raise Exception("Command failed with return code %d" % proc.returncode)

    # Required output job_pipeline.star file
    pipeline_fn = getPath(job_dir, "job_pipeline.star")
    table_gen = Table(columns=['rlnPipeLineJobCounter'])
    table_gen.addRow(2)
    table_proc = Table(columns=['rlnPipeLineProcessName', 'rlnPipeLineProcessAlias',
                                'rlnPipeLineProcessTypeLabel', 'rlnPipeLineProcessStatusLabel'])
    table_proc.addRow(job_dir, 'None', 'relion.external', 'Running')
    table_nodes = Table(columns=['rlnPipeLineNodeName', 'rlnPipeLineNodeTypeLabel'])
    table_nodes.addRow(in_parts, "ParticlesData.star.relion")
    table_input = Table(columns=['rlnPipeLineEdgeFromNode', 'rlnPipeLineEdgeProcess'])
    table_input.addRow(in_parts, job_dir)

    with open(pipeline_fn, "w") as f:
        table_gen.writeStar(f, tableName="pipeline_general", singleRow=True)
        table_proc.writeStar(f, tableName="pipeline_processes")
        table_nodes.writeStar(f, tableName="pipeline_nodes")
        table_input.writeStar(f, tableName="pipeline_input_edges")

    end = time.time()
    diff = end - start
    print("Job duration = %dh %dmin %dsec \n" % (diff // 3600, diff // 60 % 60, diff % 60))
def run_job(project_dir, args):
    start = time.time()
    in_parts = args.in_parts
    job_dir = args.out_dir
    batch = args.batch_size
    gpu = args.gpu

    getPath = lambda *arglist: os.path.join(project_dir, *arglist)

    # Reading the model star file from relion
    modelstar = in_parts.replace("_data.star", "_model.star")
    refstable = Table(fileName=getPath(modelstar), tableName='model_classes')
    refstack = refstable[0].rlnReferenceImage.split("@")[-1]
    nrCls = int(refstable[-1].rlnReferenceImage.split("@")[0])

    if DEBUG:
        print("Found input class averages stack: %s" % refstack)

    # Launching cryoassess
    args_dict = {
        '-i': getPath(refstack),
        '-o': getPath(job_dir, 'output'),
        '-b': batch,
        '-m': CRYOASSESS_2D_MODEL,
    }
    cmd = "%s && CUDA_VISIBLE_DEVICES=%s %s " % (CONDA_ENV, gpu, CRYOASSESS_2D)
    cmd += " ".join(['%s %s' % (k, v) for k, v in args_dict.items()])

    print("Running command:\n{}".format(cmd))
    proc = subprocess.Popen(cmd, shell=True)
    proc.communicate()

    if proc.returncode:
        raise Exception("Command failed with return code %d" % proc.returncode)

    # Parse output to get good classes IDs
    goodTemplate = getPath(job_dir, "output/Good/particle_*.jpg")
    regex = re.compile('particle_(\d*)\.jpg')
    goodcls = []
    files = glob(goodTemplate)
    if files:
        for i in files:
            s = regex.search(i)
            goodcls.append(int(s.group(1)))

    if DEBUG:
        print("Parsing output files: %s\nGood classes: %s" %
              (goodTemplate, goodcls))

    if len(goodcls) == 0:
        print("No good classes found. Job stopped.")
        end = time.time()
        diff = end - start
        print("Job duration = %dh %dmin %dsec \n" %
              (diff // 3600, diff // 60 % 60, diff % 60))
        open(RELION_JOB_FAILURE_FILENAME, "w").close()
        exit(1)

    # Create output star file for Relion to use
    optics = Table(fileName=getPath(in_parts), tableName='optics')
    ptcls = Table(fileName=getPath(in_parts), tableName='particles')
    cols = ptcls.getColumnNames()
    out_ptcls = Table(columns=cols)

    for row in ptcls:
        if row.rlnClassNumber in goodcls:
            out_ptcls.addRow(*row)

    if DEBUG:
        print("Input particles: %d\nOutput particles: %d" %
              (len(ptcls), len(out_ptcls)))

    out_star = getPath(job_dir, "particles_for_training.star")
    with open(out_star, "w") as f:
        optics.writeStar(f, tableName="optics")
        out_ptcls.writeStar(f, tableName="particles")

    # Create backup_selection.star for results visualization
    sel = Table(columns=['rlnSelected'])
    for i in range(1, nrCls + 1):
        sel.addRow(1 if i in goodcls else 0)
    with open(getPath("backup_selection.star"), "w") as f:
        sel.writeStar(f, tableName="")

    end = time.time()
    diff = end - start
    print("Job duration = %dh %dmin %dsec \n" %
          (diff // 3600, diff // 60 % 60, diff % 60))
    def convertInputStep(self, movId, partId, postId):
        inputMovies = self.inputMovies.get()
        inputParts = self.inputParticles.get()
        imgStar = self._getFileName('input_particles')
        inputPartsFolder = self._getInputPath('particles')
        pwutils.makePath(inputPartsFolder)

        self.info("Converting set from '%s' into '%s'" %
                  (inputParts.getFileName(), imgStar))

        tableGeneral = Table(columns=[
            'rlnImageSizeX', 'rlnImageSizeY', 'rlnImageSizeZ',
            'rlnMicrographMovieName', 'rlnMicrographBinning',
            'rlnMicrographOriginalPixelSize', 'rlnMicrographDoseRate',
            'rlnMicrographPreExposure', 'rlnVoltage',
            'rlnMicrographStartFrame', 'rlnMotionModelVersion',
            'rlnMicrographGainName', 'rlnMicrographDefectFile'
        ])
        tableShifts = Table(columns=[
            'rlnMicrographFrameNumber', 'rlnMicrographShiftX',
            'rlnMicrographShiftY'
        ])
        tableCoeffs = Table(
            columns=['rlnMotionModelCoeffsIdx', 'rlnMotionModelCoeff'])

        # Create the first row, later only the movieName will be updated
        xdim, ydim, ndim = inputMovies.getDim()
        acq = inputMovies.getAcquisition()
        firstMovie = inputMovies.getFirstItem()
        a0, aN = firstMovie.getAlignment().getRange()
        moviesPixelSize = inputMovies.getSamplingRate()
        binningFactor = inputParts.getSamplingRate() / moviesPixelSize

        og = convert.OpticsGroups.fromImages(inputMovies)
        writer = convert.createWriter(optics=og)
        writer.writeSetOfMicrographs(inputMovies,
                                     self._getFileName('input_mics'),
                                     postprocessImageRow=self._updateMic)

        tableGeneral.addRow(xdim, ydim, ndim, 'movieName',
                            binningFactor, moviesPixelSize,
                            acq.getDosePerFrame(), acq.getDoseInitial(),
                            acq.getVoltage(), a0, 0, '""', '""')
        row = tableGeneral[0]

        for movie in inputMovies:
            movieStar = self._getMovieStar(movie)
            ogId = movie.getAttributeValue('_rlnOpticsGroup', 1)
            gainFn = og[ogId].get('rlnMicrographGainName', None)
            defectFn = og[ogId].get('rlnMicrographDefectFile', None)

            with open(movieStar, 'w') as f:
                coeffs = json.loads(
                    movie.getAttributeValue('_rlnMotionModelCoeff', '[]'))
                motionMode = 1 if coeffs else 0

                # Update some params in the general table
                replaceDict = {
                    'rlnMicrographMovieName': movie.getFileName(),
                    'rlnMotionModelVersion': motionMode
                }
                if gainFn:
                    replaceDict['rlnMicrographGainName'] = gainFn
                if defectFn:
                    replaceDict['rlnMicrographDefectFile'] = defectFn

                tableGeneral[0] = row._replace(**replaceDict)
                tableGeneral.writeStar(f, tableName='general', singleRow=True)
                # Write shifts
                tableShifts.clearRows()
                alignment = movie.getAlignment()
                shiftsX, shiftsY = alignment.getShifts()
                a0, aN = alignment.getRange()
                empty = -9999.000
                for i in range(1, a0):
                    tableShifts.addRow(i, empty, empty)
                # Adjust the shifts to be relative to the first frame
                # so let's add the opposite value
                xoff, yoff = -shiftsX[0], -shiftsY[0]
                for i in range(a0, aN + 1):
                    tableShifts.addRow(i, shiftsX[i - a0] + xoff,
                                       shiftsY[i - a0] + yoff)
                for i in range(aN + 1, ndim + 1):
                    tableShifts.addRow(i, empty, empty)
                tableShifts.writeStar(f, tableName='global_shift')

                # Write coefficients
                tableCoeffs.clearRows()
                if coeffs:
                    for i, c in enumerate(coeffs):
                        tableCoeffs.addRow(i, c)
                    tableCoeffs.writeStar(f, tableName='local_motion_model')

        convert.writeSetOfParticles(inputParts,
                                    imgStar,
                                    outputDir=inputPartsFolder,
                                    alignType=ALIGN_PROJ,
                                    fillMagnification=True)
def run_job(project_dir, args):
    start = time.time()
    in_parts = args.in_parts
    job_dir = args.out_dir
    thresh = args.threshold
    model = args.model
    gpus = args.gpu

    getPath = lambda *arglist: os.path.join(project_dir, *arglist)

    if model == "None":
        model = CINDERELLA_GEN_MODEL
    else:
        model = getPath(model)

    # Reading the model star file from relion
    modelstar = in_parts.replace("_data.star", "_model.star")
    refstable = Table(fileName=getPath(modelstar), tableName='model_classes')
    refstack = refstable[0].rlnReferenceImage.split("@")[-1]
    nrCls = int(refstable[-1].rlnReferenceImage.split("@")[0])

    if DEBUG:
        print("Found input class averages stack: %s" % refstack)

    # Launching cinderella
    args_dict = {
        '-i': getPath(refstack),
        '-o': 'output',
        '-w': model,
        '--gpu': gpus,
        '-t': thresh,
    }
    cmd = "%s && %s " % (CONDA_ENV, CINDERELLA_PREDICT)
    cmd += " ".join(['%s %s' % (k, v) for k, v in args_dict.items()])

    print("Running command:\n{}".format(cmd))
    proc = subprocess.Popen(cmd, shell=True)
    proc.communicate()

    if proc.returncode:
        raise Exception("Command failed with return code %d" % proc.returncode)

    # Parse output to get good classes IDs
    outfn = os.path.basename(refstack.replace(".mrcs", "_index_confidence.txt"))
    outpath = getPath(job_dir, "output", outfn)
    goodcls = []
    with open(outpath, "r") as f:
        for line in f:
            if float(line.split()[1]) > thresh:
                goodcls.append(int(line.split()[0]) + 1)
            else:
                break

    if DEBUG:
        print("Parsing output file: %s\nGood classes: %s" % (outpath, goodcls))

    if len(goodcls) == 0:
        print("No good classes found. Job stopped.")
        end = time.time()
        diff = end - start
        print("Job duration = %dh %dmin %dsec \n" % (diff // 3600, diff // 60 % 60, diff % 60))
        open(RELION_JOB_FAILURE_FILENAME, "w").close()
        exit(1)

    # Create output star file for Relion to use
    optics = Table(fileName=getPath(in_parts), tableName='optics')
    ptcls = Table(fileName=getPath(in_parts), tableName='particles')
    cols = ptcls.getColumnNames()
    out_ptcls = Table(columns=cols)
    
    for row in ptcls:
        if row.rlnClassNumber in goodcls:
            out_ptcls.addRow(*row)

    if DEBUG:
        print("Input particles: %d\nOutput particles: %d" % 
              (len(ptcls), len(out_ptcls)))

    out_star = getPath(job_dir, "particles_for_training.star")
    with open(out_star, "w") as f:
        optics.writeStar(f, tableName="optics")
        out_ptcls.writeStar(f, tableName="particles")

    # Create backup_selection.star for results visualization
    sel = Table(columns=['rlnSelected'])
    for i in range(1, nrCls + 1):
        sel.addRow(1 if i in goodcls else 0)
    with open(getPath("backup_selection.star"), "w") as f:
        sel.writeStar(f, tableName="")

    end = time.time()
    diff = end - start
    print("Job duration = %dh %dmin %dsec \n" % (diff//3600, diff//60 % 60, diff % 60))