Python convert_unicode Examples, utils.xutils.convert_unicode Python Examples

Example #1

0

Show file

File: rapd_cctbx.py Project: RAPD/RAPD

def get_mtz_info(datafile):
    """
    Get unit cell and SG from input mtz
    """

    sg = False
    cell = False
    vol = False

    # Convert from unicode
    datafile = convert_unicode(datafile)

    # Read datafile
    data = iotbx_mtz.object(datafile)

    # Derive space group from datafile
    sg = fix_R3_sg(data.space_group_name().replace(" ", ""))

    # Wrangle the cell parameters
    cell = [round(x,3) for x in data.crystals()[0].unit_cell_parameters() ]

    # The volume
    vol = data.crystals()[0].unit_cell().volume()

    return (sg, cell, vol)

Example #2

0

Show file

def get_mtz_info(data_file):
    """
    Get unit cell and SG from input mtz
    """

    sg = False
    cell = False
    vol = False

    # Convert from unicode
    data_file = convert_unicode(data_file)

    # Read data_file
    data = iotbx_mtz.object(data_file)

    # Derive space group from data_file
    sg = fix_R3_sg(data.space_group_name().replace(" ", ""))

    # Wrangle the cell parameters
    cell = [round(x, 3) for x in data.crystals()[0].unit_cell_parameters()]

    # The volume
    vol = data.crystals()[0].unit_cell().volume()

    return (sg, cell, vol)

Example #3

0

Show file

def get_res(data_file):
    """Return resolution limit of dataset"""

    data_file = convert_unicode(data_file)
    data = iotbx_mtz.object(data_file)

    return float(data.max_min_resolution()[-1])

Example #4

0

Show file

File: rapd_cctbx.py Project: RAPD/RAPD

def get_res(datafile):
    """Return resolution limit of dataset"""

    datafile = convert_unicode(datafile)
    data = iotbx_mtz.object(datafile)

    return float(data.max_min_resolution()[-1])

Example #5

0

Show file

File: rapd_cctbx.py Project: RAPD/RAPD

def get_spacegroup_info(cif_file):
    """Get info from PDB of mmCIF file"""

    # print "get_spacegroup_info", cif_file, os.getcwd()

    cif_file = convert_unicode(cif_file)

    if cif_file[-3:].lower() == "cif":
        fail = False
        cif_spacegroup = False

        try:
            input_file = open(cif_file, "r").read(20480)
            for line in input_file.split('\n'):
                if "_symmetry.space_group_name_H-M" in line:
                    cif_spacegroup = line[32:].strip()[1:-1].upper().replace(" ", "")
                if "_pdbx_database_status.pdb_format_compatible" in line:
                    if line.split()[1] == "N":
                        fail = True
        except IOError:
            return False
        if fail:
            return False
        else:
            return cif_spacegroup
    else:
        return str(iotbx_pdb.input(cif_file).crystal_symmetry().space_group_info()).upper().replace(" ", "")

Example #6

0

Show file

def get_spacegroup_info(struct_file):
    """Get info from PDB of mmCIF file"""

    # print "get_spacegroup_info", struct_file, os.getcwd()

    struct_file = convert_unicode(struct_file)

    if struct_file[-3:].lower() == "cif":
        fail = False
        cif_spacegroup = False

        try:
            input_file = open(struct_file, "r").read(20480)
            for line in input_file.split('\n'):
                if "_symmetry.space_group_name_H-M" in line:
                    cif_spacegroup = line[32:].strip()[1:-1].upper().replace(
                        " ", "")
                    # print cif_spacegroup
                if "_pdbx_database_status.pdb_format_compatible" in line:
                    if line.split()[1] == "N":
                        fail = True
        except IOError:
            return False
        if fail:
            return False
        else:
            return cif_spacegroup
    else:
        return str(
            iotbx_pdb.input(struct_file).crystal_symmetry().space_group_info()
        ).upper().replace(" ", "")

Example #7

0

Show file

File: rapd_phaser.py Project: bopopescu/RAPD

 def wrapper(**kwargs):
     os.chdir(kwargs.get('work_dir', os.getcwd()))
     if not kwargs.get('script', False):
         # Pop out the launcher
         launcher = kwargs.pop('launcher', None)
         # Pop out the batch_queue
         batch_queue = kwargs.pop('batch_queue', None)
         # Pop out the results_queue
         result_queue = kwargs.pop('result_queue', None)
         # Create a unique identifier for Phaser results
         #kwargs['output_id'] = 'Phaser_%d' % random.randint(0, 10000)
         # Grab the RAPD python path (if available)
         rapd_python = kwargs.pop('rapd_python', 'rapd.python')
         # Signal to launch run
         kwargs['script'] = True
         if kwargs.get('pool', False):
             # If running on local machine. Launcher will be 'utils.processes.local_subprocess'
             pool = kwargs.pop('pool')
             f = write_script(kwargs)
             proc = pool.apply_async(
                 launcher,
                 kwds={
                     "command":
                     "%s %s" % (rapd_python, f),
                     "logfile":
                     os.path.join(convert_unicode(kwargs.get('work_dir')),
                                  'rapd_phaser.log'),
                     "result_queue":
                     result_queue,
                 })
             #return (proc, 'junk', kwargs['output_id'])
             return (proc, 'junk')
         else:
             # If running on computer cluster. Launcher will be sites.cluster.(site_name).process_cluster
             f = write_script(kwargs)
             pid_queue = Queue()
             proc = Process(target=launcher,
                            kwargs={
                                "command":
                                "%s %s" % (rapd_python, f),
                                "logfile":
                                os.path.join(kwargs.get('work_dir'),
                                             'rapd_phaser.log'),
                                "pid_queue":
                                pid_queue,
                                "batch_queue":
                                batch_queue,
                            })
             proc.start()
             #return (proc, pid_queue.get(), kwargs['output_id'])
             return (proc, pid_queue.get())
     else:
         # Remove extra input params used to setup job
         l = ['script', 'test']
         for k in l:
             # pop WON'T error out if key not found!
             _ = kwargs.pop(k, None)
         # Just launch job
         return func(**kwargs)

Example #8

0

Show file

File: rapd_phaser.py Project: bopopescu/RAPD

 def run_ellg():
     res0 = 0.0
     i0 = phaser.InputMR_ELLG()
     i0.setSPAC_HALL(r.getSpaceGroupHall())
     i0.setCELL6(r.getUnitCell())
     i0.setMUTE(True)
     i0.setREFL_DATA(r.getDATA())
     if f[-3:] in ('cif'):
         i0.addENSE_CIT_ID('model', convert_unicode(f), 0.7)
     else:
         i0.addENSE_PDB_ID("model", convert_unicode(f), 0.7)
     # i.addSEAR_ENSE_NUM("junk",5)
     r1 = phaser.runMR_ELLG(i0)
     #print r1.logfile()
     if r1.Success():
         res0 = r1.get_target_resolution('model')
     del (r1)
     return res0

Example #9

0

Show file

File: rapd_phaser.py Project: RAPD/RAPD

 def run_ellg():
     new_res = 0.0
     i0 = phaser.InputMR_ELLG()
     i0.setSPAC_HALL(r.getSpaceGroupHall())
     i0.setCELL6(r.getUnitCell())
     i0.setMUTE(True)
     i0.setREFL_DATA(r.getDATA())
     if mmcif[-3:] in ('cif'):
         i0.addENSE_CIT_ID('model', convert_unicode(mmcif), 0.7)
     else:
         i0.addENSE_PDB_ID("model", convert_unicode(mmcif), 0.7)
     r1 = phaser.runMR_ELLG(i0)
     #print r1.logfile()
     if r1.Success():
         # If it worked use the recommended resolution
         new_res = round(r1.get_target_resolution('model'), 1)
     del(r1)
     return new_res

Example #10

0

Show file

File: rapd_phaser.py Project: RAPD/RAPD

 def run_ellg():
     res0 = 0.0
     i0 = phaser.InputMR_ELLG()
     i0.setSPAC_HALL(r.getSpaceGroupHall())
     i0.setCELL6(r.getUnitCell())
     i0.setMUTE(True)
     i0.setREFL_DATA(r.getDATA())
     if f[-3:] in ('cif'):
         i0.addENSE_CIT_ID('model', convert_unicode(f), 0.7)
     else:
         i0.addENSE_PDB_ID("model", convert_unicode(f), 0.7)
     # i.addSEAR_ENSE_NUM("junk",5)
     r1 = phaser.runMR_ELLG(i0)
     #print r1.logfile()
     if r1.Success():
         res0 = r1.get_target_resolution('model')
     del(r1)
     return res0

Example #11

0

Show file

File: rapd_phaser.py Project: bopopescu/RAPD

    def run_ellg():
        new_res = 0.0
        i0 = phaser.InputMR_ELLG()
        i0.setSPAC_HALL(r.getSpaceGroupHall())
        i0.setCELL6(r.getUnitCell())
        i0.setMUTE(True)
        i0.setREFL_DATA(r.getDATA())
        #  Read in CIF file
        if struct_file[-3:] in ('cif', ):
            i0.addENSE_CIT_ID("model", convert_unicode(struct_file), 0.7)
        # Read in PDB file
        else:
            i0.addENSE_PDB_ID("model", convert_unicode(struct_file), 0.7)
        try:
            r1 = phaser.runMR_ELLG(i0)
        except RuntimeError as e:
            # print "Hit error"
            # Known CIF error - convert to pdb and retry
            if struct_file[-3:] in ('cif', ):
                # print "Convert to pdb"
                pdb.cif_as_pdb((struct_file, ))
                pdb_file = struct_file.replace(".cif", ".pdb")
                i1 = phaser.InputMR_ELLG()
                i1.setSPAC_HALL(r.getSpaceGroupHall())
                i1.setCELL6(r.getUnitCell())
                i1.setMUTE(True)
                i1.setREFL_DATA(r.getDATA())
                i1.addENSE_PDB_ID("model", convert_unicode(pdb_file), 0.7)
                r1 = phaser.runMR_ELLG(i1)
            else:
                raise e

        # print r1.logfile()
        if r1.Success():
            # If it worked use the recommended resolution
            new_res = round(r1.get_target_resolution('model'), 1)
        del (r1)
        return new_res

Example #12

0

Show file

File: pdb_query.py Project: bopopescu/RAPD

    def __init__(self, command, output=None, tprint=False, logger=None):

        # If the logging instance is passed in...
        if logger:
            self.logger = logger
        else:
            # Otherwise get the logger Instance
            self.logger = logging.getLogger("RAPDLogger")
            self.logger.debug("__init__")

        # Store tprint for use throughout
        if tprint:
            self.tprint = tprint
        # Dead end if no tprint passed
        else:

            def func(arg=False, level=False, verbosity=False, color=False):
                pass

            self.tprint = func

        # Stopwatch
        self.start_time = time.time()

        # Store inputs
        self.input = command
        self.output = output
        self.logger = logger

        # pprint(command)

        # Params
        self.working_dir = self.input["directories"].get("work", os.getcwd())
        self.test = self.input["preferences"].get("test", False)
        self.sample_type = self.input["preferences"].get("type", "protein")
        self.solvent_content = self.input["preferences"].get(
            "solvent_content", 0.55)
        self.cluster_use = self.input["preferences"].get("cluster", False)
        self.clean = self.input["preferences"].get("clean", True)
        self.gui = self.input["preferences"].get("gui", True)
        # self.controller_address = self.input[0].get("control", False)
        self.verbose = self.input["preferences"].get("verbose", False)
        self.datafile = xutils.convert_unicode(
            self.input["input_data"].get("datafile"))

        Process.__init__(self, name="PDBQuery")
        self.start()

Example #13

0

Show file

File: pdb_query.py Project: RAPD/RAPD

    def __init__(self, command, output=None, tprint=False, logger=None):

        # If the logging instance is passed in...
        if logger:
            self.logger = logger
        else:
            # Otherwise get the logger Instance
            self.logger = logging.getLogger("RAPDLogger")
            self.logger.debug("__init__")

        # Store tprint for use throughout
        if tprint:
            self.tprint = tprint
        # Dead end if no tprint passed
        else:
            def func(arg=False, level=False, verbosity=False, color=False):
                pass
            self.tprint = func

        # Stopwatch
        self.start_time = time.time()

        # Store inputs
        self.input = command
        self.output = output
        self.logger = logger

        # pprint(command)

        # Params
        self.working_dir = self.input["directories"].get("work", os.getcwd())
        self.test = self.input["preferences"].get("test", False)
        self.sample_type = self.input["preferences"].get("type", "protein")
        self.solvent_content = self.input["preferences"].get("solvent_content", 0.55)
        self.cluster_use = self.input["preferences"].get("cluster", False)
        self.clean = self.input["preferences"].get("clean", True)
        self.gui = self.input["preferences"].get("gui", True)
        # self.controller_address = self.input[0].get("control", False)
        self.verbose = self.input["preferences"].get("verbose", False)
        self.datafile = xutils.convert_unicode(self.input["input_data"].get("datafile"))

        Process.__init__(self, name="PDBQuery")
        self.start()

Example #14

0

Show file

File: rapd_phaser.py Project: RAPD/RAPD

 def wrapper(**kwargs):
     os.chdir(kwargs.get('work_dir', os.getcwd()))
     if not kwargs.get('script', False):
         # Pop out the launcher
         launcher = kwargs.pop('launcher', None)
         # Pop out the batch_queue
         batch_queue = kwargs.pop('batch_queue', None)
         # Create a unique identifier for Phaser results
         kwargs['output_id'] = 'Phaser_%d' % random.randint(0, 10000)
         # Signal to launch run
         kwargs['script'] = True
         if kwargs.get('pool', False):
             # If running on local machine
             pool = kwargs.pop('pool')
             f = write_script(kwargs)
             new_kwargs = {"command": "rapd2.python %s" % f,
                           "logfile": os.path.join(convert_unicode(kwargs.get('work_dir')), 'rapd_phaser.log'),
                           }
             proc = pool.apply_async(launcher, kwds=new_kwargs,)
             return (proc, 'junk', kwargs['output_id'])
         else:
             # If running on computer cluster
             f = write_script(kwargs)
             pid_queue = Queue()
             proc = Process(target=launcher,
                            kwargs={"command": "rapd2.python %s" % f,
                                    "pid_queue": pid_queue,
                                    "batch_queue": batch_queue,
                                    "logfile": os.path.join(kwargs.get('work_dir'), 'rapd_phaser.log'),
                                    })
             proc.start()
             return (proc, pid_queue.get(), kwargs['output_id'])
     else:
         # Remove extra input params used to setup job
         l = ['script', 'test']
         for k in l:
             # pop WON'T error out if key not found!
             _ = kwargs.pop(k, None)
         # Just launch job
         return func(**kwargs)

Example #15

0

Show file

File: rapd_phaser.py Project: bopopescu/RAPD

def run_phaser(
    data_file,
    struct_file,
    spacegroup,
    db_settings=False,
    tag=False,
    work_dir=False,
    adf=False,
    name=False,
    ncopy=1,
    cell_analysis=False,
    resolution=False,
    full=False,
):
    """
    Run Phaser and passes results back to RAPD Redis DB
    **Requires Phaser src code!**

    data_file - input data as mtz (required)
    struct_file - input search model path in mmCIF or PDB format (required)
    spacegroup - The space group to run MR (required)

    tag - a Redis key where the results are sent (cluster mode)
    db_settings - Redis connection settings for sending results (cluster mode)
    work_dir - working directory (defaults to current working dir)
    name - root name for output files (defaults to spacegroup)
    ncopy - number of molecules to search for
    cell_analysis - internal RAPD signal so all possible SG's are searched
    resolution - high res limit to run MR (float)
    full - signal to run more comprehensive MR
    """

    phaser_log = False
    # Change to work_dir
    if not work_dir:
        work_dir = os.getcwd()
    os.chdir(work_dir)

    if not name:
        name = spacegroup

    # # Handle CIF file input -> PDB
    # if struct_file[-3:] == "cif":
    #     pdb.cif_as_pdb(struct_file)
    #     struct_file = struct_file.replace(".cif", ".pdb")

    # Read the dataset
    i = phaser.InputMR_DAT()
    i.setHKLI(convert_unicode(data_file))
    i.setLABI_F_SIGF('F', 'SIGF')
    i.setMUTE(True)
    r1 = phaser.runMR_DAT(i)
    # Need to determine Phaser version for keyword changes!
    version = re.search(r'Version:\s*([\d.]+)', r1.logfile()).group(1)

    if r1.Success():
        i = phaser.InputMR_AUTO()
        # i.setREFL_DATA(r1.getREFL_DATA())
        # i.setREFL_DATA(r1.DATA_REFL())
        i.setREFL_F_SIGF(r1.getMiller(), r1.getFobs(), r1.getSigFobs())
        i.setCELL6(r1.getUnitCell())
        if struct_file[-3:].lower() == "cif":
            #i.addENSE_CIF_ID('model', cif, 0.7)
            ### Typo in PHASER CODE!!! <<<CIT>>> ###
            i.addENSE_CIT_ID('model', convert_unicode(struct_file), 0.7)
        else:
            i.addENSE_PDB_ID('model', convert_unicode(struct_file), 0.7)
        i.addSEAR_ENSE_NUM("model", ncopy)
        i.setSPAC_NAME(spacegroup)
        if cell_analysis:
            i.setSGAL_SELE("ALL")
            # Set it for worst case in orth
            # number of processes to run in parallel where possible
            i.setJOBS(1)
        else:
            i.setSGAL_SELE("NONE")
        if full:
            # Picks own resolution
            # Round 2, pick best solution as long as less that 10% clashes
            i.setPACK_SELE("PERCENT")
            i.setPACK_CUTO(0.1)
            #command += "PACK CUTOFF 10\n"
        else:
            # For first round and cell analysis
            # Only set the resolution limit in the first round or cell analysis.
            if resolution:
                i.setRESO_HIGH(resolution)
            else:
                i.setRESO_HIGH(6.0)
            # If Phaser version < 2.6.0
            if int(version.split('.')[1]) <= 6:
                i.setSEAR_DEEP(False)
            else:
                i.setSEAR_METH("FAST")

            # Don"t seem to work since it picks the high res limit now.
            # Get an error when it prunes all the solutions away and TF has no input.
            # command += "PEAKS ROT SELECT SIGMA CUTOFF 4.0\n"
            # command += "PEAKS TRA SELECT SIGMA CUTOFF 6.0\n"
        # Turn off pruning in 2.6.0
        i.setSEAR_PRUN(False)
        # Choose more top peaks to help with getting it correct.
        i.setPURG_ROTA_ENAB(True)
        i.setPURG_ROTA_NUMB(3)
        #command += "PURGE ROT ENABLE ON\nPURGE ROT NUMBER 3\n"
        i.setPURG_TRAN_ENAB(True)
        i.setPURG_TRAN_NUMB(1)
        #command += "PURGE TRA ENABLE ON\nPURGE TRA NUMBER 1\n"

        # Only keep the top after refinement.
        i.setPURG_RNP_ENAB(True)
        i.setPURG_RNP_NUMB(1)
        #command += "PURGE RNP ENABLE ON\nPURGE RNP NUMBER 1\n"
        i.setROOT(convert_unicode(name))
        # i.setMUTE(False)
        i.setMUTE(True)
        # Delete the setup results
        # del(r)
        # launch the run
        # r = phaser.runMR_AUTO(i)

        try:
            r = phaser.runMR_AUTO(i)
        except RuntimeError as e:
            # print "Hit error"
            # Known CIF error - convert to pdb and retry
            if struct_file[-3:] in ('cif', ):
                # print "Convert to pdb"
                pdb.cif_as_pdb((struct_file, ))
                pdb_file = struct_file.replace(".cif", ".pdb")

                i = phaser.InputMR_AUTO()
                # i.setREFL_DATA(r1.getREFL_DATA())
                # i.setREFL_DATA(r1.DATA_REFL())
                i.setREFL_F_SIGF(r1.getMiller(), r1.getFobs(), r1.getSigFobs())
                i.setCELL6(r1.getUnitCell())
                i.addENSE_PDB_ID('model', convert_unicode(pdb_file), 0.7)
                i.addSEAR_ENSE_NUM("model", ncopy)
                i.setSPAC_NAME(spacegroup)
                if cell_analysis:
                    i.setSGAL_SELE("ALL")
                    # Set it for worst case in orth
                    # number of processes to run in parallel where possible
                    i.setJOBS(1)
                else:
                    i.setSGAL_SELE("NONE")
                if full:
                    # Picks own resolution
                    # Round 2, pick best solution as long as less that 10% clashes
                    i.setPACK_SELE("PERCENT")
                    i.setPACK_CUTO(0.1)
                    #command += "PACK CUTOFF 10\n"
                else:
                    # For first round and cell analysis
                    # Only set the resolution limit in the first round or cell analysis.
                    if resolution:
                        i.setRESO_HIGH(resolution)
                    else:
                        i.setRESO_HIGH(6.0)
                    # If Phaser version < 2.6.0
                    if int(version.split('.')[1]) <= 6:
                        i.setSEAR_DEEP(False)
                    else:
                        i.setSEAR_METH("FAST")

                    # Don"t seem to work since it picks the high res limit now.
                    # Get an error when it prunes all the solutions away and TF has no input.
                    # command += "PEAKS ROT SELECT SIGMA CUTOFF 4.0\n"
                    # command += "PEAKS TRA SELECT SIGMA CUTOFF 6.0\n"
                # Turn off pruning in 2.6.0
                i.setSEAR_PRUN(False)
                # Choose more top peaks to help with getting it correct.
                i.setPURG_ROTA_ENAB(True)
                i.setPURG_ROTA_NUMB(3)
                #command += "PURGE ROT ENABLE ON\nPURGE ROT NUMBER 3\n"
                i.setPURG_TRAN_ENAB(True)
                i.setPURG_TRAN_NUMB(1)
                #command += "PURGE TRA ENABLE ON\nPURGE TRA NUMBER 1\n"

                # Only keep the top after refinement.
                i.setPURG_RNP_ENAB(True)
                i.setPURG_RNP_NUMB(1)
                #command += "PURGE RNP ENABLE ON\nPURGE RNP NUMBER 1\n"
                i.setROOT(convert_unicode(name))
                # i.setMUTE(False)
                i.setMUTE(True)
                # Delete the setup results
                # del(r)
                # launch the run
                r = phaser.runMR_AUTO(i)
            else:
                raise e

        if r.Success():
            # print r
            pass
            #if r.foundSolutions():
            #print "Phaser has found MR solutions"
            #print "Top LLG = %f" % r.getTopLLG()
            #print "Top PDB file = %s" % r.getTopPdbFile()
            #else:
            #print "Phaser has not found any MR solutions"
        else:
            print "Job exit status FAILURE"
            print r.ErrorName(), "ERROR :", r.ErrorMessage()

        # Save log files for debugging
        phaser_log = r.logfile()
        with open('phaser.log', 'w') as log:
            log.write(r.logfile())
            log.close()

        if r.foundSolutions():
            rfz = None
            tfz = None
            tncs = False
            # Parse results
            for p in r.getTopSet().ANNOTATION.split():
                # print p
                # For v 2.8.3
                # RF*0\nTF*0\nLLG=30699\nTFZ==174.8\nPAK=0\nLLG=30699\nTFZ==174.8\n
                if p.count('RFZ'):
                    if p.count('=') in [1]:
                        rfz = float(p.split('=')[-1])
                if p.count('RF*0'):
                    rfz = "NC"
                if p.count('TFZ'):
                    if p.count('=') in [1]:
                        tfz = p.split('=')[-1]
                        if tfz == '*':
                            tfz = 'arbitrary'
                        else:
                            tfz = float(tfz)
                if p.count('TF*0'):
                    tfz = "NC"
            tncs_test = [
                1 for line in r.getTopSet().unparse().splitlines()
                if line.count("+TNCS")
            ]
            tncs = bool(len(tncs_test))
            mtz_file = os.path.join(work_dir, r.getTopMtzFile())
            phaser_result = {
                "ID": name,
                "solution": r.foundSolutions(),
                "pdb_file": os.path.join(work_dir, r.getTopPdbFile()),
                "mtz": mtz_file,
                "gain": float(r.getTopLLG()),
                "rfz": rfz,
                # "tfz": r.getTopTFZ(),
                "tfz": tfz,
                "clash": r.getTopSet().PAK,
                "dir": os.getcwd(),
                "spacegroup":
                r.getTopSet().getSpaceGroupName().replace(' ', ''),
                "tNCS": tncs,
                "nmol": r.getTopSet().NUM,
                "adf": None,
                "peak": None,
            }

            # Calculate 2Fo-Fc & Fo-Fc maps
            # foo.mtz begets foo_2mFo-DFc.ccp4 & foo__mFo-DFc.ccp4
            local_subprocess(command="phenix.mtz2map %s" % mtz_file,
                             logfile='map.log',
                             shell=True)

            # Map files should now exist
            map_2_1 = mtz_file.replace(".mtz", "_2mFo-DFc.ccp4")
            map_1_1 = mtz_file.replace(".mtz", "_mFo-DFc.ccp4")

            # Make sure the maps exist and then package them
            if os.path.exists(map_2_1):
                # Compress the map
                arch_prod_file, arch_prod_hash = archive.compress_file(map_2_1)
                # Remove the map that was compressed
                os.unlink(map_2_1)
                # Store information
                map_for_display = {
                    "path": arch_prod_file,
                    "hash": arch_prod_hash,
                    "description": "map_2_1"
                }
                phaser_result["map_2_1"] = map_for_display

            if os.path.exists(map_1_1):
                # Compress the map
                arch_prod_file, arch_prod_hash = archive.compress_file(map_1_1)
                # Remove the map that was compressed
                os.unlink(map_1_1)
                # Store information
                map_for_display = {
                    "path": arch_prod_file,
                    "hash": arch_prod_hash,
                    "description": "map_1_1"
                }
                phaser_result["map_1_1"] = map_for_display

            # If PDB exists, package that too
            if phaser_result.get("pdb_file", False):
                if os.path.exists(phaser_result.get("pdb_file")):
                    # Compress the file
                    arch_prod_file, arch_prod_hash = archive.compress_file(
                        phaser_result.get("pdb_file"))
                    # Remove the map that was compressed
                    # os.unlink(phaser_result.get("pdb"))
                    # Store information
                    pdb_for_display = {
                        "path":
                        arch_prod_file,
                        "hash":
                        arch_prod_hash,
                        "description":
                        os.path.basename(phaser_result.get("pdb_file"))
                    }
                    phaser_result["pdb"] = pdb_for_display

            # Calc ADF map
            if adf:
                if os.path.exists(phaser_result.get(
                        "pdb_file", False)) and os.path.exists(
                            phaser_result.get("mtz", False)):
                    adf_results = calc_ADF_map(data_file=data_file,
                                               mtz=phaser_result["mtz"],
                                               pdb=phaser_result["pdb_file"])
                    if adf_results.get("adf"):
                        phaser_result.update({
                            "adf":
                            os.path.join(work_dir, adf_results.get("adf"))
                        })
                    if adf_results.get("peak"):
                        phaser_result.update({
                            "peak":
                            os.path.join(work_dir, adf_results.get("peak"))
                        })
                    #phaser_result.update({"adf": adf_results.get("adf", None),
                    #                      "peak": adf_results.get("peak", None),})

            # print "1"
            # print name
            # New procedure for making tar of results
            # Create directory
            # Remove the run # from the name
            # new_name = name[:-2]  #
            new_name = phaser_result.get("ID")  #
            # print new_name
            os.mkdir(new_name)
            # # Go through and copy files to archive directory
            file_types = ("pdb_file", "mtz", "adf", "peak")
            for file_type in file_types:
                # print file_type
                target_file = phaser_result.get(file_type, False)
                # print target_file
                if target_file:
                    if os.path.exists(target_file):
                        # Copy the file to the directory to be archived
                        shutil.copy(target_file, new_name + "/.")
            # # Create the archive
            archive_result = archive.create_archive(new_name)
            archive_result["description"] = '%s_files' % new_name
            phaser_result["tar"] = archive_result

            # print "2"

        else:
            phaser_result = {
                "ID": name,
                "solution": False,
                "message": "No solution",
                "spacegroup": spacegroup
            }
        # Add the phaser log
        if phaser_log:
            phaser_result.update({"logs": {"phaser": phaser_log}})

        # print "3"

        if db_settings and tag:
            print "db_settings and tag"
            # Connect to Redis
            redis = connect_to_redis(db_settings)
            # Key should be deleted once received, but set the key to expire in 24 hours just in case.
            redis.setex(tag, 86400, json.dumps(phaser_result))
            # Do a little sleep to make sure results are in Redis for postprocess_phaser
            time.sleep(0.1)
        else:
            # print "Printing phaser_result"
            # Print the result so it can be seen thru the queue by reading stdout
            # print phaser_result
            print json.dumps(phaser_result)

Example #16

0

Show file

File: rapd_phaser.py Project: RAPD/RAPD

def run_phaser_module_OLD(datafile, inp=False):
    """
    Run separate module of Phaser to get results before running full job.
    Setup so that I can read the data in once and run multiple modules.
    """
    # if self.verbose:
    #  self.logger.debug('Utilities::runPhaserModule')

    target_resolution = 0.0
    z = 0
    solvent_content = 0.0

    def run_ellg():
        res0 = 0.0
        i0 = phaser.InputMR_ELLG()
        i0.setSPAC_HALL(r.getSpaceGroupHall())
        i0.setCELL6(r.getUnitCell())
        i0.setMUTE(True)
        i0.setREFL_DATA(r.getDATA())
        if f[-3:] in ('cif'):
            i0.addENSE_CIT_ID('model', convert_unicode(f), 0.7)
        else:
            i0.addENSE_PDB_ID("model", convert_unicode(f), 0.7)
        # i.addSEAR_ENSE_NUM("junk",5)
        r1 = phaser.runMR_ELLG(i0)
        #print r1.logfile()
        if r1.Success():
            res0 = r1.get_target_resolution('model')
        del(r1)
        return res0

    def run_cca():
        z0 = 0
        sc0 = 0.0
        i0 = phaser.InputCCA()
        i0.setSPAC_HALL(r.getSpaceGroupHall())
        i0.setCELL6(r.getUnitCell())
        i0.setMUTE(True)
        # Have to set high res limit!!
        i0.setRESO_HIGH(res0)
        if np > 0:
            i0.addCOMP_PROT_NRES_NUM(np, 1)
        if na > 0:
            i0.addCOMP_NUCL_NRES_NUM(na, 1)
        r1 = phaser.runCCA(i0)
        #print r1.logfile()
        if r1.Success():
            z0 = r1.getBestZ()
            sc0 = 1-(1.23/r1.getBestVM())
        del(r1)
        return (z0, sc0)

    def run_ncs():
        i0 = phaser.InputNCS()
        i0.setSPAC_HALL(r.getSpaceGroupHall())
        i0.setCELL6(r.getUnitCell())
        i0.setREFL_DATA(r.getDATA())
        # i0.setREFL_F_SIGF(r.getMiller(),r.getF(),r.getSIGF())
        # i0.setLABI_F_SIGF(f,sigf)
        i0.setMUTE(True)
        # i0.setVERB(True)
        r1 = phaser.runNCS(i0)
        print r1.logfile()
        print r1.loggraph().size()
        print r1.loggraph().__dict__.keys()
        #print r1.getCentricE4()
        if r1.Success():
            return(r1)

    def run_ano():
        #from cStringIO import StringIO
        i0 = phaser.InputANO()
        i0.setSPAC_HALL(r.getSpaceGroupHall())
        i0.setCELL6(r.getUnitCell())
        # i0.setREFL(p.getMiller(),p.getF(),p.getSIGF())
        # i0.setREFL_F_SIGF(r.getMiller(),r.getF(),r.getSIGF())
        # i0.setREFL_F_SIGF(p.getMiller(),p.getIobs(),p.getSigIobs())
        i0.setREFL_DATA(r.getDATA())
        i0.setMUTE(True)
        r1 = phaser.runANO(i0)
        print r1.loggraph().__dict__.keys()
        print r1.loggraph().size()
        print r1.logfile()
        """
        o = phaser.Output()
        redirect_str = StringIO()
        o.setPackagePhenix(file_object=redirect_str)
        r1 = phaser.runANO(i0,o)
        """

        if r1.Success():
            print 'SUCCESS'
            return(r1)

    # Setup which modules are run
    matthews = False
    if inp:
        ellg = True
        ncs = False
        if type(inp) == str:
            f = inp
        else:
            np, na, res0, f = inp
            matthews = True
    else:
        ellg = False
        ncs = True

    # Read the dataset
    i = phaser.InputMR_DAT()
    i.setHKLI(convert_unicode(datafile))
    i.setLABI_F_SIGF('F', 'SIGF')
    i.setMUTE(True)
    r = phaser.runMR_DAT(i)
    if r.Success():
        if ellg:
            target_resolution = run_ellg()
        if matthews:
            z, solvent_content = run_cca()
        if ncs:
            n = run_ncs()
    if matthews:
        # Assumes ellg is run as well.
        # return (z,sc,res)
        return {"z": z,
                "solvent_content": solvent_content,
                "target_resolution": target_resolution}
    elif ellg:
        # ellg run by itself
        # return target_resolution
        return {"target_resolution": target_resolution}
    else:
        # NCS
        return n

Example #17

0

Show file

File: rapd_phaser.py Project: bopopescu/RAPD

def run_phaser_module(data_file,
                      result_queue=False,
                      cca=False,
                      tncs=False,
                      ellg=False,
                      struct_file=False,
                      dres=False,
                      np=0,
                      na=0):
    """
    Run separate module of Phaser to get results before running full job.
    Setup so that I can read the data in once and run multiple modules.
    data_file - input dataset mtz file
    result_queue - pass results to queue
    cca - Run CCA to determine number of molecules in AU, and solvent content (Matthew's Coefficient calc)
    tncs - Run Anisotropy and tNCS correction on CID plots
    ellg - Run analysis to determonine optimum Phaser resolution MR.
    struct_file - input struct_file file. Could be a PDB or mmCIF file
    dres - resolution of dataset (ELLG, CCA)
    np - default number of protein residues (CCA)
    na - default number of nucleic acid residues (CCA)
    """

    target_resolution = 0.0
    z = 0
    solvent_content = 0.0

    def run_ellg():
        new_res = 0.0
        i0 = phaser.InputMR_ELLG()
        i0.setSPAC_HALL(r.getSpaceGroupHall())
        i0.setCELL6(r.getUnitCell())
        i0.setMUTE(True)
        i0.setREFL_DATA(r.getDATA())
        #  Read in CIF file
        if struct_file[-3:] in ('cif', ):
            i0.addENSE_CIT_ID("model", convert_unicode(struct_file), 0.7)
        # Read in PDB file
        else:
            i0.addENSE_PDB_ID("model", convert_unicode(struct_file), 0.7)
        try:
            r1 = phaser.runMR_ELLG(i0)
        except RuntimeError as e:
            # print "Hit error"
            # Known CIF error - convert to pdb and retry
            if struct_file[-3:] in ('cif', ):
                # print "Convert to pdb"
                pdb.cif_as_pdb((struct_file, ))
                pdb_file = struct_file.replace(".cif", ".pdb")
                i1 = phaser.InputMR_ELLG()
                i1.setSPAC_HALL(r.getSpaceGroupHall())
                i1.setCELL6(r.getUnitCell())
                i1.setMUTE(True)
                i1.setREFL_DATA(r.getDATA())
                i1.addENSE_PDB_ID("model", convert_unicode(pdb_file), 0.7)
                r1 = phaser.runMR_ELLG(i1)
            else:
                raise e

        # print r1.logfile()
        if r1.Success():
            # If it worked use the recommended resolution
            new_res = round(r1.get_target_resolution('model'), 1)
        del (r1)
        return new_res

    def run_cca(res):
        z0 = 0
        sc0 = 0.0
        i0 = phaser.InputCCA()
        i0.setSPAC_HALL(r.getSpaceGroupHall())
        i0.setCELL6(r.getUnitCell())
        i0.setMUTE(True)
        # Have to set high res limit!!
        i0.setRESO_HIGH(res)
        if np > 0:
            i0.addCOMP_PROT_NRES_NUM(np, 1)
        if na > 0:
            i0.addCOMP_NUCL_NRES_NUM(na, 1)
        r1 = phaser.runCCA(i0)
        #print r1.logfile()
        #print dir(r1)
        if r1.Success():
            z0 = r1.getBestZ()
            try:
                sc0 = round(1 - (1.23 / r1.getBestVM()), 2)
            except ZeroDivisionError:
                sc0 = 0
        del (r1)
        return (z0, sc0)

    def run_tncs():
        # CAN'T GET READABLE loggraph?!?
        i0 = phaser.InputNCS()
        i0.setSPAC_HALL(r.getSpaceGroupHall())
        i0.setCELL6(r.getUnitCell())
        i0.setREFL_DATA(r.getDATA())
        # i0.setREFL_F_SIGF(r.getMiller(),r.getF(),r.getSIGF())
        # i0.setLABI_F_SIGF(f,sigf)
        i0.setMUTE(True)
        # i0.setVERB(True)
        r1 = phaser.runNCS(i0)
        print dir(r1)
        print r1.logfile()
        # for l in r1.loggraph():
        #    print l
        print r1.loggraph().size()
        print r1.output_strings
        #print r1.hasTNCS()
        #print r1.summary()
        print r1.warnings()
        print r1.ErrorMessage()
        #print r1.getCentricE4()
        if r1.Success():
            return (r1.loggraph())

    def run_ano():
        #from cStringIO import StringIO
        i0 = phaser.InputANO()
        i0.setSPAC_HALL(r.getSpaceGroupHall())
        i0.setCELL6(r.getUnitCell())
        # i0.setREFL(p.getMiller(),p.getF(),p.getSIGF())
        # i0.setREFL_F_SIGF(r.getMiller(),r.getF(),r.getSIGF())
        # i0.setREFL_F_SIGF(p.getMiller(),p.getIobs(),p.getSigIobs())
        i0.setREFL_DATA(r.getDATA())
        i0.setMUTE(True)
        r1 = phaser.runANO(i0)
        print r1.loggraph().__dict__.keys()
        print r1.loggraph().size()
        print r1.logfile()
        """
        o = phaser.Output()
        redirect_str = StringIO()
        o.setPackagePhenix(file_object=redirect_str)
        r1 = phaser.runANO(i0,o)
        """

        if r1.Success():
            print 'SUCCESS'
            return (r1)

    # MAIN
    # Setup which modules are run
    # Read input MTZ file
    i = phaser.InputMR_DAT()
    i.setHKLI(convert_unicode(data_file))
    i.setLABI_F_SIGF('F', 'SIGF')
    i.setMUTE(True)
    r = phaser.runMR_DAT(i)
    if r.Success():
        if ellg:
            target_resolution = run_ellg()
        if cca:
            # Assumes ellg is run as well.
            z, solvent_content = run_cca(target_resolution)
        if tncs:
            n = run_tncs()
    if cca:
        out = {
            "z": z,
            "solvent_content": solvent_content,
            "target_resolution": target_resolution
        }
        if result_queue:
            result_queue.put(out)
        else:
            return out
    elif ellg:
        # ellg run by itself
        out = {"target_resolution": target_resolution}
        if result_queue:
            result_queue.put(out)
        else:
            return out
    else:
        # tNCS
        out = n
        if result_queue:
            result_queue.put(out)
        else:
            return out
    """

Example #18

0

Show file

File: rapd_phaser.py Project: RAPD/RAPD

def run_phaser_module(data_file,
                      result_queue=False,
                      cca=False,
                      tncs=False,
                      ellg=False,
                      mmcif=False,
                      dres=False,
                      np=0,
                      na=0,):
    """
    Run separate module of Phaser to get results before running full job.
    Setup so that I can read the data in once and run multiple modules.
    data_file - input dataset mtz file
    result_queue - pass results to queue
    cca - Run CCA to determine number of molecules in AU, and solvent content (Matthew's Coefficient calc)
    tncs - Run Anisotropy and tNCS correction on CID plots
    ellg - Run analysis to determonine optimum Phaser resolution MR.
    mmcif - input mmcif file. Could also be a PDB file
    dres - resolution of dataset (ELLG, CCA)
    np - default number of protein residues (CCA)
    na - default number of nucleic acid residues (CCA)
    """

    target_resolution = 0.0
    z = 0
    solvent_content = 0.0

    def run_ellg():
        new_res = 0.0
        i0 = phaser.InputMR_ELLG()
        i0.setSPAC_HALL(r.getSpaceGroupHall())
        i0.setCELL6(r.getUnitCell())
        i0.setMUTE(True)
        i0.setREFL_DATA(r.getDATA())
        if mmcif[-3:] in ('cif'):
            i0.addENSE_CIT_ID('model', convert_unicode(mmcif), 0.7)
        else:
            i0.addENSE_PDB_ID("model", convert_unicode(mmcif), 0.7)
        r1 = phaser.runMR_ELLG(i0)
        #print r1.logfile()
        if r1.Success():
            # If it worked use the recommended resolution
            new_res = round(r1.get_target_resolution('model'), 1)
        del(r1)
        return new_res

    def run_cca(res):
        z0 = 0
        sc0 = 0.0
        i0 = phaser.InputCCA()
        i0.setSPAC_HALL(r.getSpaceGroupHall())
        i0.setCELL6(r.getUnitCell())
        i0.setMUTE(True)
        # Have to set high res limit!!
        i0.setRESO_HIGH(res)
        if np > 0:
            i0.addCOMP_PROT_NRES_NUM(np, 1)
        if na > 0:
            i0.addCOMP_NUCL_NRES_NUM(na, 1)
        r1 = phaser.runCCA(i0)
        #print r1.logfile()
        #print dir(r1)
        if r1.Success():
            z0 = r1.getBestZ()
            sc0 = round(1-(1.23/r1.getBestVM()), 2)
        del(r1)
        return (z0, sc0)

    def run_tncs():
        # CAN'T GET READABLE loggraph?!?
        i0 = phaser.InputNCS()
        i0.setSPAC_HALL(r.getSpaceGroupHall())
        i0.setCELL6(r.getUnitCell())
        i0.setREFL_DATA(r.getDATA())
        # i0.setREFL_F_SIGF(r.getMiller(),r.getF(),r.getSIGF())
        # i0.setLABI_F_SIGF(f,sigf)
        i0.setMUTE(True)
        # i0.setVERB(True)
        r1 = phaser.runNCS(i0)
        print dir(r1)
        print r1.logfile()
        # for l in r1.loggraph():
        #    print l
        print r1.loggraph().size()
        print r1.output_strings
        #print r1.hasTNCS()
        #print r1.summary()
        print r1.warnings()
        print r1.ErrorMessage()
        #print r1.getCentricE4()
        if r1.Success():
            return(r1.loggraph())

    def run_ano():
        #from cStringIO import StringIO
        i0 = phaser.InputANO()
        i0.setSPAC_HALL(r.getSpaceGroupHall())
        i0.setCELL6(r.getUnitCell())
        # i0.setREFL(p.getMiller(),p.getF(),p.getSIGF())
        # i0.setREFL_F_SIGF(r.getMiller(),r.getF(),r.getSIGF())
        # i0.setREFL_F_SIGF(p.getMiller(),p.getIobs(),p.getSigIobs())
        i0.setREFL_DATA(r.getDATA())
        i0.setMUTE(True)
        r1 = phaser.runANO(i0)
        print r1.loggraph().__dict__.keys()
        print r1.loggraph().size()
        print r1.logfile()
        """
        o = phaser.Output()
        redirect_str = StringIO()
        o.setPackagePhenix(file_object=redirect_str)
        r1 = phaser.runANO(i0,o)
        """

        if r1.Success():
            print 'SUCCESS'
            return(r1)

    # MAIN
    # Setup which modules are run
    # Read input MTZ file
    i = phaser.InputMR_DAT()
    i.setHKLI(convert_unicode(data_file))
    i.setLABI_F_SIGF('F', 'SIGF')
    i.setMUTE(True)
    r = phaser.runMR_DAT(i)
    if r.Success():
        if ellg:
            target_resolution = run_ellg()
        if cca:
            # Assumes ellg is run as well.
            z, solvent_content = run_cca(target_resolution)
        if tncs:
            n = run_tncs()
    if cca:
        out = {"z": z,
               "solvent_content": solvent_content,
               "target_resolution": target_resolution}
        if result_queue:
            result_queue.put(out)
        else:
            return out
    elif ellg:
        # ellg run by itself
        out = {"target_resolution": target_resolution}
        if result_queue:
            result_queue.put(out)
        else:
            return out
    else:
        # tNCS
        out = n
        if result_queue:
            result_queue.put(out)
        else:
            return out

    """

Example #19

0

Show file

File: rapd_phaser.py Project: RAPD/RAPD

def run_phaser(datafile,
               spacegroup,
               output_id,
               db_settings,
               work_dir=False,
               cif=False,
               pdb=False,
               name=False,
               ncopy=1,
               cell_analysis=False,
               resolution=False,
               large_cell=False,
               run_before=False,
               ):
    """
    Run Phaser and passes results back to RAPD Redis DB
    **Requires Phaser src code!**

    datafile - input data as mtz
    spacegroup - The space group to run MR
    output_id - a Redis key where the results are sent
    db_settings - Redis connection settings for sending results
    work_dir - working directory
    cif - input search model path in mmCIF format (do not use with 'pdb')
    pdb -  input search model path in PDB format (do not use with 'cif')
    name - root name for output files
    ncopy - number of molecules to search for
    cell_analysis - internal RAPD signal so all possible SG's are searched
    resolution - high res limit to run MR (float)
    large_cell - optimizes parameters to speed up MR with large unit cell.
    run_before - signal to run more comprehensive MR
    """
    # Change to work_dir
    if not work_dir:
        work_dir = os.getcwd()
    os.chdir(work_dir)

    if not name:
        name = spacegroup

    # Connect to Redis
    redis = connect_to_redis(db_settings)

    # Read the dataset
    i = phaser.InputMR_DAT()
    i.setHKLI(convert_unicode(datafile))
    i.setLABI_F_SIGF('F', 'SIGF')
    i.setMUTE(True)
    r = phaser.runMR_DAT(i)
    if r.Success():
        i = phaser.InputMR_AUTO()
        # i.setREFL_DATA(r.getREFL_DATA())
        # i.setREFL_DATA(r.DATA_REFL())
        i.setREFL_F_SIGF(r.getMiller(), r.getFobs(), r.getSigFobs())
        i.setCELL6(r.getUnitCell())
        if cif:
            #i.addENSE_CIF_ID('model', cif, 0.7)
            ### Typo in PHASER CODE!!!###
            i.addENSE_CIT_ID('model', convert_unicode(cif), 0.7)
        if pdb:
            i.addENSE_PDB_ID('model', convert_unicode(pdb), 0.7)
        i.addSEAR_ENSE_NUM("model", ncopy)
        i.setSPAC_NAME(spacegroup)
        if cell_analysis:
            i.setSGAL_SELE("ALL")
            # Set it for worst case in orth
            # number of processes to run in parallel where possible
            i.setJOBS(1)
        else:
            i.setSGAL_SELE("NONE")
        if run_before:
            # Picks own resolution
            # Round 2, pick best solution as long as less that 10% clashes
            i.setPACK_SELE("PERCENT")
            i.setPACK_CUTO(0.1)
            #command += "PACK CUTOFF 10\n"
        else:
            # For first round and cell analysis
            # Only set the resolution limit in the first round or cell analysis.
            if resolution:
                i.setRESO_HIGH(resolution)
            else:
                # Otherwise it runs a second MR at full resolution!!
                # I dont think a second round is run anymore.
                # command += "RESOLUTION SEARCH HIGH OFF\n"
                if large_cell:
                    i.setRESO_HIGH(6.0)
                else:
                    i.setRESO_HIGH(4.5)
            i.setSEAR_DEEP(False)
            # Don"t seem to work since it picks the high res limit now.
            # Get an error when it prunes all the solutions away and TF has no input.
            # command += "PEAKS ROT SELECT SIGMA CUTOFF 4.0\n"
            # command += "PEAKS TRA SELECT SIGMA CUTOFF 6.0\n"
        # Turn off pruning in 2.6.0
        i.setSEAR_PRUN(False)
        # Choose more top peaks to help with getting it correct.
        i.setPURG_ROTA_ENAB(True)
        i.setPURG_ROTA_NUMB(3)
        #command += "PURGE ROT ENABLE ON\nPURGE ROT NUMBER 3\n"
        i.setPURG_TRAN_ENAB(True)
        i.setPURG_TRAN_NUMB(1)
        #command += "PURGE TRA ENABLE ON\nPURGE TRA NUMBER 1\n"

        # Only keep the top after refinement.
        i.setPURG_RNP_ENAB(True)
        i.setPURG_RNP_NUMB(1)
        #command += "PURGE RNP ENABLE ON\nPURGE RNP NUMBER 1\n"
        i.setROOT(convert_unicode(name))
        # i.setMUTE(False)
        i.setMUTE(True)
        # Delete the setup results
        del(r)
        # launch the run
        r = phaser.runMR_AUTO(i)
        if r.Success():
            if r.foundSolutions():
                print "Phaser has found MR solutions"
                #print "Top LLG = %f" % r.getTopLLG()
                #print "Top PDB file = %s" % r.getTopPdbFile()
            else:
                print "Phaser has not found any MR solutions"
        else:
            print "Job exit status FAILURE"
            print r.ErrorName(), "ERROR :", r.ErrorMessage()

        with open('phaser.log', 'w') as log:
            log.write(r.logfile())
            log.close()
        with open('phaser_sum.log', 'w') as log:
            log.write(r.summary())
            log.close()

    if r.foundSolutions():
        rfz = None
        tfz = None
        tncs = False
        # Parse results
        for p in r.getTopSet().ANNOTATION.split():
            if p.count('RFZ'):
                if p.count('=') in [1]:
                    rfz = float(p.split('=')[-1])
            if p.count('RF*0'):
                rfz = "NC"
            if p.count('TFZ'):
                if p.count('=') in [1]:
                    tfz = p.split('=')[-1]
                    if tfz == '*':
                        tfz = 'arbitrary'
                    else:
                        tfz = float(tfz)
            if p.count('TF*0'):
                tfz = "NC"
        tncs_test = [1 for line in r.getTopSet().unparse().splitlines()
                     if line.count("+TNCS")]
        tncs = bool(len(tncs_test))
        phaser_result = {"ID": name,
                         "solution": r.foundSolutions(),
                         "pdb": r.getTopPdbFile(),
                         "mtz": r.getTopMtzFile(),
                         "gain": float(r.getTopLLG()),
                         "rfz": rfz,
                         # "tfz": r.getTopTFZ(),
                         "tfz": tfz,
                         "clash": r.getTopSet().PAK,
                         "dir": os.getcwd(),
                         "spacegroup": r.getTopSet().getSpaceGroupName().replace(' ', ''),
                         "tNCS": tncs,
                         "nmol": r.getTopSet().NUM,
                         "adf": None,
                         "peak": None,
                         }
        
        # make tar.bz2 of result files
        # l = ['pdb', 'mtz', 'adf', 'peak']
        # archive = "%s.tar.bz2" % name
        # with tarfile.open(archive, "w:bz2") as tar:
        #     for f in l:
        #         fo = phaser_result.get(f, False)
        #         if fo:
        #             if os.path.exists(fo):
        #                 tar.add(fo)
        #     tar.close()
        # phaser_result['tar'] = os.path.join(work_dir, archive)
        
        # New procedure for making tar of results
        # Create directory
        os.mkdir(name)
        # Go through and copy files to archive directory
        file_types = ("pdb", "mtz", "adf", "peak")
        for file_type in file_types:
            target_file = phaser_result.get(file_type, False)
            if target_file:
                if os.path.exists(target_file):
                    # Copy the file to the directory to be archived
                    shutil.copy(target_file, name+"/.")
        # Create the archive
        archive_result = archive.create_archive(name)
        archive_result["description"] = name
        phaser_result["tar"] = archive_result
        
        phaser_result["pdb_file"] = os.path.join(work_dir, r.getTopPdbFile())
    else:
        phaser_result = {"ID": name,
                         "solution": False,
                         "message": "No solution"}

    # Print the result so it can be seen in the rapd._phaser.log if needed
    print phaser_result

    # Key should be deleted once received, but set the key to expire in 24 hours just in case.
    redis.setex(output_id, 86400, json.dumps(phaser_result))
    # Do a little sleep to make sure results are in Redis for postprocess_phaser
    time.sleep(0.1)

Example #20

0

Show file

File: plugin.py Project: bopopescu/RAPD

    def __init__(self,
                 site,
                 command,
                 processed_results=False,
                 tprint=False,
                 logger=False,
                 verbosity=False):
        """Initialize the plugin"""
        Thread.__init__(self)

        # If the logging instance is passed in...
        if logger:
            self.logger = logger
        else:
            # Otherwise get the logger Instance
            self.logger = logging.getLogger("RAPDLogger")
            self.logger.debug("__init__")

        # Keep track of start time
        self.start_time = time.time()
        # Store tprint for use throughout
        if tprint:
            self.tprint = tprint
        # Dead end if no tprint passed
        else:

            def func(arg=False, level=False, verbosity=False, color=False):
                """Dummy function"""
                pass

            self.tprint = func

        # Used for sending results back to DB referencing a dataset
        self.processed_results = processed_results

        # Some logging
        self.logger.info(command)

        self.verbose = verbosity

        # Store passed-in variables
        self.site = site
        self.command = command
        self.preferences = self.command.get("preferences", {})

        # Params
        self.working_dir = self.command["directories"].get("work", os.getcwd())

        self.test = self.preferences.get("test", False)
        #self.test = self.preferences.get("test", True) # Limit number of runs on cluster

        #self.sample_type = self.preferences.get("type", "protein")
        #self.solvent_content = self.preferences.get("solvent_content", 0.55)
        self.clean = self.preferences.get("clean", True)
        # self.verbose = self.command["preferences"].get("verbose", False)
        self.data_file = xutils.convert_unicode(
            self.command["input_data"].get("data_file"))
        # Used for setting up Redis connection
        self.db_settings = self.command["input_data"].get("db_settings")
        #self.nproc = self.preferences.get("nproc", 1)

        # If no launcher is passed in, use local_subprocess in a multiprocessing.Pool
        self.computer_cluster = xutils.load_cluster_adapter(self)
        if self.computer_cluster:
            self.launcher = self.computer_cluster.process_cluster
            self.batch_queue = self.computer_cluster.check_queue(
                self.command.get('command'))
        else:
            self.launcher = local_subprocess
            self.pool = mp_pool(self.preferences.get("nproc", cpu_count() - 1))
            self.manager = mp_manager()

        # Setup a multiprocessing pool if not using a computer cluster.
        #if not self.computer_cluster:
        #    self.pool = mp_pool(self.nproc)

        # Set Python path for subcontractors.rapd_phaser
        self.rapd_python = "rapd.python"
        if self.site:
            if hasattr(self.site, "RAPD_PYTHON_PATH"):
                self.rapd_python = self.site.RAPD_PYTHON_PATH

Example #21

0

Show file

File: rapd_phaser.py Project: bopopescu/RAPD

def run_phaser_module_OLD(data_file, inp=False):
    """
    Run separate module of Phaser to get results before running full job.
    Setup so that I can read the data in once and run multiple modules.
    """
    # if self.verbose:
    #  self.logger.debug('Utilities::runPhaserModule')

    target_resolution = 0.0
    z = 0
    solvent_content = 0.0

    def run_ellg():
        res0 = 0.0
        i0 = phaser.InputMR_ELLG()
        i0.setSPAC_HALL(r.getSpaceGroupHall())
        i0.setCELL6(r.getUnitCell())
        i0.setMUTE(True)
        i0.setREFL_DATA(r.getDATA())
        if f[-3:] in ('cif'):
            i0.addENSE_CIT_ID('model', convert_unicode(f), 0.7)
        else:
            i0.addENSE_PDB_ID("model", convert_unicode(f), 0.7)
        # i.addSEAR_ENSE_NUM("junk",5)
        r1 = phaser.runMR_ELLG(i0)
        #print r1.logfile()
        if r1.Success():
            res0 = r1.get_target_resolution('model')
        del (r1)
        return res0

    def run_cca():
        z0 = 0
        sc0 = 0.0
        i0 = phaser.InputCCA()
        i0.setSPAC_HALL(r.getSpaceGroupHall())
        i0.setCELL6(r.getUnitCell())
        i0.setMUTE(True)
        # Have to set high res limit!!
        i0.setRESO_HIGH(res0)
        if np > 0:
            i0.addCOMP_PROT_NRES_NUM(np, 1)
        if na > 0:
            i0.addCOMP_NUCL_NRES_NUM(na, 1)
        r1 = phaser.runCCA(i0)
        #print r1.logfile()
        if r1.Success():
            z0 = r1.getBestZ()
            sc0 = 1 - (1.23 / r1.getBestVM())
        del (r1)
        return (z0, sc0)

    def run_ncs():
        i0 = phaser.InputNCS()
        i0.setSPAC_HALL(r.getSpaceGroupHall())
        i0.setCELL6(r.getUnitCell())
        i0.setREFL_DATA(r.getDATA())
        # i0.setREFL_F_SIGF(r.getMiller(),r.getF(),r.getSIGF())
        # i0.setLABI_F_SIGF(f,sigf)
        i0.setMUTE(True)
        # i0.setVERB(True)
        r1 = phaser.runNCS(i0)
        print r1.logfile()
        print r1.loggraph().size()
        print r1.loggraph().__dict__.keys()
        #print r1.getCentricE4()
        if r1.Success():
            return (r1)

    def run_ano():
        #from cStringIO import StringIO
        i0 = phaser.InputANO()
        i0.setSPAC_HALL(r.getSpaceGroupHall())
        i0.setCELL6(r.getUnitCell())
        # i0.setREFL(p.getMiller(),p.getF(),p.getSIGF())
        # i0.setREFL_F_SIGF(r.getMiller(),r.getF(),r.getSIGF())
        # i0.setREFL_F_SIGF(p.getMiller(),p.getIobs(),p.getSigIobs())
        i0.setREFL_DATA(r.getDATA())
        i0.setMUTE(True)
        r1 = phaser.runANO(i0)
        print r1.loggraph().__dict__.keys()
        print r1.loggraph().size()
        print r1.logfile()
        """
        o = phaser.Output()
        redirect_str = StringIO()
        o.setPackagePhenix(file_object=redirect_str)
        r1 = phaser.runANO(i0,o)
        """

        if r1.Success():
            print 'SUCCESS'
            return (r1)

    # Setup which modules are run
    matthews = False
    if inp:
        ellg = True
        ncs = False
        if type(inp) == str:
            f = inp
        else:
            np, na, res0, f = inp
            matthews = True
    else:
        ellg = False
        ncs = True

    # Read the dataset
    i = phaser.InputMR_DAT()
    i.setHKLI(convert_unicode(data_file))
    i.setLABI_F_SIGF('F', 'SIGF')
    i.setMUTE(True)
    r = phaser.runMR_DAT(i)
    if r.Success():
        if ellg:
            target_resolution = run_ellg()
        if matthews:
            z, solvent_content = run_cca()
        if ncs:
            n = run_ncs()
    if matthews:
        # Assumes ellg is run as well.
        # return (z,sc,res)
        return {
            "z": z,
            "solvent_content": solvent_content,
            "target_resolution": target_resolution
        }
    elif ellg:
        # ellg run by itself
        # return target_resolution
        return {"target_resolution": target_resolution}
    else:
        # NCS
        return n

Example #22

0

Show file

File: plugin.py Project: bopopescu/RAPD

    def __init__(self,
                 command,
                 site=False,
                 processed_results=False,
                 tprint=False,
                 logger=False,
                 verbosity=False):
        """Initialize the plugin"""
        Thread.__init__(self)

        # If the logging instance is passed in...
        if logger:
            self.logger = logger
        else:
            # Otherwise get the logger Instance
            self.logger = logging.getLogger("RAPDLogger")
            self.logger.debug("__init__")

        # Keep track of start time
        self.start_time = time.time()
        # Store tprint for use throughout
        if tprint:
            self.tprint = tprint
        # Dead end if no tprint passed
        else:

            def func(arg=False, level=False, verbosity=False, color=False):
                """Dummy function"""
                pass

            self.tprint = func

        # Used for sending results back to DB referencing a dataset
        self.processed_results = processed_results

        # Some logging
        self.logger.info(command)

        self.verbose = verbosity

        # Store passed-in variables
        self.site = site
        self.command = command
        self.preferences = self.command.get("preferences", {})

        # Params
        self.working_dir = self.command["directories"].get("work", os.getcwd())

        #self.test = self.preferences.get("test", False)
        self.test = self.preferences.get(
            "test", True)  # Limit number of runs on cluster

        #self.sample_type = self.preferences.get("type", "protein")
        #self.solvent_content = self.preferences.get("solvent_content", 0.55)
        # Number of molecules specified
        #self.nmol = self.preferences.get('nmol', False)
        # Input data MTZ file
        self.data_file = xutils.convert_unicode(
            self.command["input_data"].get("data_file"))
        # Input PDB/mmCIF file or PDB code.
        self.struct_file = xutils.convert_unicode(
            self.command["input_data"].get("struct_file"))

        # Save preferences
        self.clean = self.preferences.get("clean", True)
        # Calc ADF for each solution (creates a lot of big map files).
        self.adf = self.preferences.get("adf", False)

        # Check if there is a computer cluster and load adapter.
        self.computer_cluster = xutils.load_cluster_adapter(self)

        if self.computer_cluster:
            self.launcher = self.computer_cluster.process_cluster
            self.batch_queue = self.computer_cluster.check_queue(
                self.command.get('command'))
        else:
            # if NOT using a computer cluster setup a multiprocessing.pool and manager for queues.
            self.launcher = local_subprocess
            self.pool = mp_pool(self.preferences.get("nproc", cpu_count() - 1))
            self.manager = mp_manager()

        # Set Python path for subcontractors.rapd_phaser
        self.rapd_python = "rapd.python"
        if self.site and hasattr(self.site, "RAPD_PYTHON_PATH"):
            self.rapd_python = self.site.RAPD_PYTHON_PATH

Example #23

0

Show file

def get_pdb_info(struct_file, data_file, dres, matthews=True, chains=True):
    """Get info from PDB or mmCIF file"""

    # Get rid of ligands and water so Phenix won't error.
    np = 0
    na = 0
    nmol = 1
    sc = 0.55
    nchains = 0
    res1 = 0.0
    d = {}
    l = []

    # Read in the file
    struct_file = convert_unicode(struct_file)
    if struct_file[-3:].lower() == 'cif':
        root = iotbx_mmcif.cif_input(
            file_name=struct_file).construct_hierarchy()
    else:
        root = iotbx_pdb.input(struct_file).construct_hierarchy()

    # Go through the chains
    for chain in root.models()[0].chains():
        # Number of protein residues
        np1 = 0
        # Number of nucleic acid residues
        na1 = 0

        # Sometimes Hetatoms are AA with same segid.
        if l.count(chain.id) == 0:
            l.append(chain.id)
            repeat = False
            nchains += 1
        else:
            repeat = True

        # Count the number of AA and NA in pdb file.
        for rg in chain.residue_groups():
            if rg.atoms()[0].parent(
            ).resname in iotbx_pdb.common_residue_names_amino_acid:
                np1 += 1
            if rg.atoms()[0].parent(
            ).resname in iotbx_pdb.common_residue_names_rna_dna:
                na1 += 1
            # Not sure if I get duplicates?
            if rg.atoms()[0].parent().resname in \
               iotbx_pdb.common_residue_names_ccp4_mon_lib_rna_dna:
                na1 += 1
        # Limit to 10 chains?!?
        if nchains < 10:
            # Do not split up PDB if run from cell analysis
            if chains and not repeat:

                # Save info for each chain.
                if np1 or na1:

                    # Write new pdb files for each chain.
                    temp = iotbx_pdb.hierarchy.new_hierarchy_from_chain(chain)

                    # Long was of making sure that user does not have directory named '.pdb' or
                    # '.cif'
                    #n = os.path.join(os.path.dirname(struct_file), "%s_%s.pdb" % \
                    n = os.path.join(os.path.dirname(struct_file), "%s_%s.cif" % \
                        (os.path.basename(struct_file)[:os.path.basename(struct_file).find('.')], \
                        chain.id))
                    #temp.write_pdb_file(file_name=n)
                    # Write chain as mmCIF file.
                    temp.write_mmcif_file(file_name=n)

                    d[chain.id] = {
                        'file': n,
                        'NRes': np1 + na1,
                        'MWna': na1 * 330,
                        'MWaa': np1 * 110,
                        'MW': na1 * 330 + np1 * 110
                    }
                    if matthews:
                        # Run Matthews Calc. on chain
                        #phaser_return = run_phaser_module((np1, na1, dres, n, data_file))
                        #phaser_return = run_phaser_module(data_file, (np1, na1, dres, n))
                        phaser_return = run_phaser_module(data_file=data_file,
                                                          ellg=True,
                                                          cca=True,
                                                          struct_file=n,
                                                          dres=dres,
                                                          np=np1,
                                                          na=na1)
                        d[chain.id].update({
                            'NMol':
                            phaser_return.get("z", nmol),
                            'SC':
                            phaser_return.get("solvent_content", sc),
                            'res':
                            phaser_return.get("target_resolution", res1)
                        })
                    else:
                        #res1 = run_phaser_module(n)
                        phaser_return = run_phaser_module(data_file=data_file,
                                                          ellg=True,
                                                          struct_file=n)
                        d[chain.id].update({
                            'res':
                            phaser_return.get("target_resolution", res1)
                        })
                    """
                    d[chain.id] = {'file': n,
                                   'NRes': np1+na1,
                                   'MWna': na1*330,
                                   'MWaa': np1*110,
                                   'MW': na1*330+np1*110,
                                   'NMol': phaser_return.get("z", nmol),
                                   'SC': phaser_return.get("solvent_content", sc),
                                   'res': phaser_return.get("target_resolution", res1)}
                    """
        # Add up residue count
        np += np1
        na += na1

    d['all'] = {
        'file': struct_file,
        'NRes': np + na,
        'MWna': na * 330,
        'MWaa': np * 110,
        'MW': na * 330 + np * 110
    }
    # Run on entire PDB
    if matthews:
        #phaser_return = run_phaser_module((np, na, dres, struct_file, data_file))
        #phaser_return = run_phaser_module(data_file, (np, na, dres, struct_file))
        phaser_return = run_phaser_module(data_file=data_file,
                                          ellg=True,
                                          cca=True,
                                          struct_file=struct_file,
                                          dres=dres,
                                          np=np,
                                          na=na)
        d['all'].update({
            'NMol': phaser_return.get("z", nmol),
            'SC': phaser_return.get("solvent_content", sc),
            'res': phaser_return.get("target_resolution", res1)
        })
    else:
        #phaser_return = run_phaser_module((np, na, dres, struct_file, data_file))
        #phaser_return = run_phaser_module(data_file, (np, na, dres, struct_file))
        # phaser_return = run_phaser_module(data_file=data_file,
        #                                   ellg=True,
        #                                   struct_file=struct_file)
        phaser_return = run_phaser_module(data_file=data_file,
                                          ellg=True,
                                          struct_file=struct_file)
        d['all'].update({'res': phaser_return.get("target_resolution", res1)})
    """
    d['all'] = {'file': struct_file,
                'NRes': np+na,
                'MWna': na*330,
                'MWaa': np*110,
                'MW': na*330+np*110,
                'NMol': phaser_return.get("z", nmol),
                'SC': phaser_return.get("solvent_content", sc),
                'res': phaser_return.get("target_resolution", res1)}
    """
    return d

Example #24

0

Show file

File: rapd_cctbx.py Project: RAPD/RAPD

def get_pdb_info(cif_file, data_file, dres, matthews=True, chains=True):
    """Get info from PDB of mmCIF file"""

    # Get rid of ligands and water so Phenix won't error.
    np = 0
    na = 0
    nmol = 1
    sc = 0.55
    nchains = 0
    res1 = 0.0
    d = {}
    l = []

    # Read in the file
    cif_file = convert_unicode(cif_file)
    if cif_file[-3:].lower() == 'cif':
        root = iotbx_mmcif.cif_input(file_name=cif_file).construct_hierarchy()
    else:
        root = iotbx_pdb.input(cif_file).construct_hierarchy()

    # Go through the chains
    for chain in root.models()[0].chains():
        # Number of protein residues
        np1 = 0
        # Number of nucleic acid residues
        na1 = 0

        # Sometimes Hetatoms are AA with same segid.
        if l.count(chain.id) == 0:
            l.append(chain.id)
            repeat = False
            nchains += 1
        else:
            repeat = True

        # Count the number of AA and NA in pdb file.
        for rg in chain.residue_groups():
            if rg.atoms()[0].parent().resname in iotbx_pdb.common_residue_names_amino_acid:
                np1 += 1
            if rg.atoms()[0].parent().resname in iotbx_pdb.common_residue_names_rna_dna:
                na1 += 1
            # Not sure if I get duplicates?
            if rg.atoms()[0].parent().resname in \
               iotbx_pdb.common_residue_names_ccp4_mon_lib_rna_dna:
                na1 += 1
        # Limit to 10 chains?!?
        if nchains < 10:
            # Do not split up PDB if run from cell analysis
            if chains and not repeat:

                # Save info for each chain.
                if np1 or na1:

                    # Write new pdb files for each chain.
                    temp = iotbx_pdb.hierarchy.new_hierarchy_from_chain(chain)

                    # Long was of making sure that user does not have directory named '.pdb' or
                    # '.cif'
                    #n = os.path.join(os.path.dirname(cif_file), "%s_%s.pdb" % \
                    n = os.path.join(os.path.dirname(cif_file), "%s_%s.cif" % \
                        (os.path.basename(cif_file)[:os.path.basename(cif_file).find('.')], \
                        chain.id))
                    #temp.write_pdb_file(file_name=n)
                    temp.write_mmcif_file(file_name=n)
                    d[chain.id] = {'file': n,
                                   'NRes': np1+na1,
                                   'MWna': na1*330,
                                   'MWaa': np1*110,
                                   'MW': na1*330+np1*110}
                    if matthews:
                        # Run Matthews Calc. on chain
                        #phaser_return = run_phaser_module((np1, na1, dres, n, data_file))
                        #phaser_return = run_phaser_module(data_file, (np1, na1, dres, n))
                        phaser_return = run_phaser_module(data_file=data_file,
                                                          ellg=True,
                                                          cca=True,
                                                          mmcif=n,
                                                          dres=dres,
                                                          np=np1,
                                                          na=na1)
                        d[chain.id].update({'NMol': phaser_return.get("z", nmol),
                                            'SC': phaser_return.get("solvent_content", sc),
                                            'res': phaser_return.get("target_resolution", res1)})
                    else:
                        #res1 = run_phaser_module(n)
                        phaser_return = run_phaser_module(data_file=data_file,
                                                           ellg=True, 
                                                           mmcif=n)
                        d[chain.id].update({'res': phaser_return.get("target_resolution", res1)})
                    """
                    d[chain.id] = {'file': n,
                                   'NRes': np1+na1,
                                   'MWna': na1*330,
                                   'MWaa': np1*110,
                                   'MW': na1*330+np1*110,
                                   'NMol': phaser_return.get("z", nmol),
                                   'SC': phaser_return.get("solvent_content", sc),
                                   'res': phaser_return.get("target_resolution", res1)}
                    """
        # Add up residue count
        np += np1
        na += na1

    d['all'] = {'file': cif_file,
                'NRes': np+na,
                'MWna': na*330,
                'MWaa': np*110,
                'MW': na*330+np*110}
    # Run on entire PDB
    if matthews:
        #phaser_return = run_phaser_module((np, na, dres, cif_file, data_file))
        #phaser_return = run_phaser_module(data_file, (np, na, dres, cif_file))
        phaser_return = run_phaser_module(data_file=data_file,
                                          ellg=True,
                                          cca=True,
                                          mmcif=cif_file,
                                          dres=dres,
                                          np=np,
                                          na=na)
        d['all'].update({'NMol': phaser_return.get("z", nmol),
                         'SC': phaser_return.get("solvent_content", sc),
                         'res': phaser_return.get("target_resolution", res1)})
    else:
        #phaser_return = run_phaser_module((np, na, dres, cif_file, data_file))
        #phaser_return = run_phaser_module(data_file, (np, na, dres, cif_file))
        phaser_return = run_phaser_module(data_file=data_file,
                                           ellg=True, 
                                           mmcif=cif_file)
        d['all'].update({'res': phaser_return.get("target_resolution", res1)})
    """
    d['all'] = {'file': cif_file,
                'NRes': np+na,
                'MWna': na*330,
                'MWaa': np*110,
                'MW': na*330+np*110,
                'NMol': phaser_return.get("z", nmol),
                'SC': phaser_return.get("solvent_content", sc),
                'res': phaser_return.get("target_resolution", res1)}
    """
    return d

Example #25

0

Show file

File: plugin.py Project: RAPD/RAPD

    def __init__(self, command, processed_results=False, computer_cluster=False, tprint=False, logger=False, verbosity=False):
        """Initialize the plugin"""
        Thread.__init__ (self)

        # If the logging instance is passed in...
        if logger:
            self.logger = logger
        else:
            # Otherwise get the logger Instance
            self.logger = logging.getLogger("RAPDLogger")
            self.logger.debug("__init__")

        # Keep track of start time
        self.start_time = time.time()
        # Store tprint for use throughout
        if tprint:
            self.tprint = tprint
        # Dead end if no tprint passed
        else:
            def func(arg=False, level=False, verbosity=False, color=False):
                """Dummy function"""
                pass
            self.tprint = func
        
        # Used for sending results back to DB referencing a dataset
        self.processed_results = processed_results

        # Some logging
        self.logger.info(command)

        self.verbose = verbosity

        # Store passed-in variables
        self.command = command
        self.preferences = self.command.get("preferences", {})

        # Params
        self.working_dir = self.command["directories"].get("work", os.getcwd())
        self.test = self.preferences.get("test", False)
        self.sample_type = self.preferences.get("type", "protein")
        self.solvent_content = self.preferences.get("solvent_content", 0.55)
        self.clean = self.preferences.get("clean", True)
        # self.verbose = self.command["preferences"].get("verbose", False)
        self.datafile = xutils.convert_unicode(self.command["input_data"].get("datafile"))
        # Used for setting up Redis connection
        self.db_settings = self.command["input_data"].get("db_settings")
        self.nproc = self.preferences.get("nproc", 1)
        self.computer_cluster = self.preferences.get("computer_cluster", False)
        
        # If no launcher is passed in, use local_subprocess in a multiprocessing.Pool

        self.computer_cluster = computer_cluster
        if self.computer_cluster:
            self.launcher = self.computer_cluster.process_cluster
            self.batch_queue = self.computer_cluster.check_queue(self.command.get('command'))
        else:
            self.launcher = local_subprocess

        # Setup a multiprocessing pool if not using a computer cluster.
        if not self.computer_cluster:
            self.pool = mp_pool(self.nproc)