Ejemplo n.º 1
0
def download_pdb(pdb_id, output_dir):
    filename = pdb_id.upper() + '.pdb'
    path = os.path.join(output_dir, filename)
    if os.path.exists(path):
        comment('%s: using existing file %s\n' % (pdb_id, filename))
    else:
        comment('Downloading %s from RCSB...  ' % pdb_id)
        url = ('http://www.rcsb.org/pdb/download/downloadFile.do'
               '?fileFormat=pdb&compression=NO&structureId=' + pdb_id.upper())
        try:
            u = urlopen(url)
        except HTTPError as e:
            put_error(str(e))
            sys.exit(1)
        content = u.read()
        try:
            if not os.path.isdir(output_dir):
                os.makedirs(output_dir)
            with open(path, 'wb') as f:
                f.write(content)
            comment('done.\n')
        except IOError as e:
            put_error('Failed to save downloaded file on disk', comment=str(e))
            sys.exit(1)
    return path
Ejemplo n.º 2
0
def parse_workflow_commands():
    prog = __package__ or os.path.basename(sys.argv[0])
    args = sys.argv[1:]
    if not args or args[0] not in ('info', 'repeat'):
        return False
    if len(args) == 1:
        sys.stderr.write("Specify output_dir.\n")
        return True

    # it's handy to use "/my/path/05-cad.log" as "/my/path" "5"
    ext = os.path.splitext(args[1])[1]
    if os.path.isfile(args[1]) and ext in ('.log', '.err'):
        dirname, basename = os.path.split(args[1])
        args[1:2] = [dirname, basename.split('-')[0]]

    wf = open_pickled_workflow(args[1])
    steps = args[2:]
    if not steps:
        show_workflow_info(wf, dict(prog=prog, output_dir=args[1]))
        return True
    for job in parse_steps(steps, wf):
        if args[0] == 'info':
            show_job_info(job)
        elif args[0] == 'repeat':
            try:
                job.data = {}  # reset data from parsing
                job.run()
                utils.comment("\n")
            except JobError as e:
                utils.put_error(e.msg, comment=e.note)
                sys.exit(1)
    return True
Ejemplo n.º 3
0
Archivo: main.py Proyecto: ccp4/dimple
def check_ccp4_envvars():
    for necessary_var in ("CCP4", "CCP4_SCR"):
        if necessary_var not in os.environ:
            put_error('$%s not found, giving up' % necessary_var)
            sys.exit(1)
    if not os.path.isdir(os.environ["CCP4_SCR"]):
        put_error('No such directory: $CCP4_SCR, refmac shall not work!')
Ejemplo n.º 4
0
Archivo: pdb.py Proyecto: ccp4/dimple
def download_pdb(pdb_id, output_dir):
    filename = pdb_id.upper()+'.pdb'
    path = os.path.join(output_dir, filename)
    if os.path.exists(path):
        comment('%s: using existing file %s\n' % (pdb_id, filename))
    else:
        comment('Downloading %s from RCSB...  ' % pdb_id)
        url = 'http://www.rcsb.org/pdb/download/downloadFile.do?fileFormat=pdb&compression=NO&structureId=' + pdb_id.upper()
        try:
            u = urllib2.urlopen(url)
        except urllib2.HTTPError as e:
            put_error(str(e))
            sys.exit(1)
        content = u.read()
        try:
            if not os.path.isdir(output_dir):
                os.makedirs(output_dir)
            with open(path, 'wb') as f:
                f.write(content)
            comment('done.\n')
        except IOError as e:
            put_error('Failed to save downloaded file on disk',
                      comment=str(e))
            sys.exit(1)
    return path
Ejemplo n.º 5
0
 def __init__(self, output_dir, from_job=0):
     self.output_dir = os.path.abspath(output_dir)
     self.jobs = []
     self.file_info = {}
     self.temporary_files = set()
     self.from_job = from_job  # skip jobs before from_job (for testing)
     if from_job >= 1:
         try:
             _pkl = self.load_pickle()
             self.repl_jobs = _pkl.jobs
             self.file_info = _pkl.file_info
         except:
             self.repl_jobs = None
     self.dry_run = False
     self.enable_logs = True
     self.argv = sys.argv
     if not os.path.isdir(self.output_dir):
         try:
             os.makedirs(self.output_dir)
         except OSError as e:
             utils.put_error(e)
             sys.exit(1)
     # this can seriously affect Refmac compiled with GFortran
     bad_var = os.getenv('GFORTRAN_UNBUFFERED_ALL')
     if bad_var and bad_var[0] not in ('0', 'n', 'N'):
         utils.put_error(
             '$GFORTRAN_UNBUFFERED_ALL may terribly slow down Refmac',
             comment='It is unset internally in dimple.')
         del os.environ['GFORTRAN_UNBUFFERED_ALL']
     # avoid html-like crap in the output of CCP4 program
     os.environ['CCP_SUPPRESS_HTML'] = '1'
Ejemplo n.º 6
0
 def __init__(self, output_dir, from_job=0):
     self.output_dir = os.path.abspath(output_dir)
     self.jobs = []
     self.file_info = {}
     self.temporary_files = set()
     self.from_job = from_job  # skip jobs before from_job (for testing)
     if from_job >= 1:
         try:
             _pkl = self.load_pickle()
             self.repl_jobs = _pkl.jobs
             self.file_info = _pkl.file_info
         except:
             self.repl_jobs = None
     self.dry_run = False
     self.enable_logs = True
     self.argv = sys.argv
     if not os.path.isdir(self.output_dir):
         try:
             os.makedirs(self.output_dir)
         except OSError as e:
             utils.put_error(e)
             sys.exit(1)
     # this can seriously affect Refmac compiled with GFortran
     bad_var = os.getenv('GFORTRAN_UNBUFFERED_ALL')
     if bad_var and bad_var[0] not in ('0', 'n', 'N'):
         utils.put_error(
                 '$GFORTRAN_UNBUFFERED_ALL may terribly slow down Refmac',
                 comment='It is unset internally in dimple.')
         del os.environ['GFORTRAN_UNBUFFERED_ALL']
     # avoid html-like crap in the output of CCP4 program
     os.environ['CCP_SUPPRESS_HTML'] = '1'
Ejemplo n.º 7
0
def check_ccp4_envvars():
    for necessary_var in ('CCP4', 'CCP4_SCR'):
        if necessary_var not in os.environ:
            put_error('$%s not found, giving up' % necessary_var)
            sys.exit(1)
    if not os.path.isdir(os.environ['CCP4_SCR']):
        put_error('No such directory: $CCP4_SCR, refmac shall not work!')
Ejemplo n.º 8
0
def parse_workflow_commands():
    prog = __package__ or os.path.basename(sys.argv[0])
    args = sys.argv[1:]
    if not args or args[0] not in ('info', 'repeat'):
        return False
    if len(args) == 1:
        sys.stderr.write("Specify output_dir.\n")
        return True

    # it's handy to use "/my/path/05-cad.log" as "/my/path" "5"
    ext = os.path.splitext(args[1])[1]
    if os.path.isfile(args[1]) and ext in ('.log', '.err'):
        dirname, basename = os.path.split(args[1])
        args[1:2] = [dirname, basename.split('-')[0]]

    wf = open_pickled_workflow(args[1])
    steps = args[2:]
    if not steps:
        show_workflow_info(wf, dict(prog=prog, output_dir=args[1]))
        return True
    for job in parse_steps(steps, wf):
        if args[0] == 'info':
            show_job_info(job)
        elif args[0] == 'repeat':
            try:
                job.data = {}  # reset data from parsing
                job.run()
                utils.comment("\n")
            except JobError as e:
                utils.put_error(e.msg, comment=e.note)
                sys.exit(1)
    return True
Ejemplo n.º 9
0
Archivo: main.py Proyecto: ccp4/dimple
def main(args):
    if workflow.parse_workflow_commands():
        return

    options = parse_dimple_commands(args)
    check_ccp4_envvars()

    wf = workflow.Workflow(options.output_dir, from_job=options.from_step)
    utils.start_log(os.path.join(options.output_dir, "dimple.log"),
                    output_dir=options.output_dir)
    utils.log_value("version", __version__)
    utils.start_log_screen(os.path.join(options.output_dir, "screen.log"))
    try:
        dimple(wf=wf, opt=options)
        check_contaminants_if_bad(wf, mtz=options.mtz)
        exit_status = 0
    except workflow.JobError as e:
        put_error(e.msg, comment=e.note)
        try:
            utils.report_disk_space([wf.output_dir, os.getenv("CCP4_SCR")])
        except KeyboardInterrupt:
            comment("\nok, exiting...")
        exit_status = 1
    except RuntimeError as e:
        put_error(e)
        exit_status = 1
    finally:
        comment("\n")
    if options.cleanup:
        wf.delete_files(wf.temporary_files)
    wf.options = options
    wf.dump_pickle()
    return exit_status
Ejemplo n.º 10
0
 def delete_files(self, filenames):
     for f in filenames:
         path = self.path(f)
         if os.path.exists(path):
             try:
                 os.remove(path)
             except OSError, e:
                 utils.put_error(e)
Ejemplo n.º 11
0
 def delete_files(self, filenames):
     for f in filenames:
         path = self.path(f)
         if os.path.exists(path):
             try:
                 os.remove(path)
             except OSError, e:
                 utils.put_error(e)
Ejemplo n.º 12
0
Archivo: main.py Proyecto: ccp4/dimple
def _write_script(path, content, executable=False):
    try:
        with open(path, 'w') as f:
            f.write(content)
        if executable:  # chmod +x
            mode = os.stat(path).st_mode
            os.chmod(path, mode | ((mode & 0444) >> 2))
    except (IOError, OSError) as e:
        put_error(e)
Ejemplo n.º 13
0
def find_path():
    if os.name == 'nt':
        default_path = "C:/WinCoot/runwincoot.bat"
        if os.path.exists(default_path):
            return default_path
        else:
            utils.put_error("WinCoot not found.")
    else:
        return utils.syspath("coot")
Ejemplo n.º 14
0
def _write_script(path, content, executable=False):
    try:
        with open(path, 'w') as f:
            f.write(content)
        if executable:  # chmod +x
            mode = os.stat(path).st_mode
            os.chmod(path, mode | ((mode & 0o444) >> 2))
    except (IOError, OSError) as e:
        put_error(e)
Ejemplo n.º 15
0
 def check_col_type(self, label, expected_type):
     if label not in self.columns:
         put_error("Column '%s' not found in %s" % (label, self.filename))
         sys.exit(1)
     col_type = self.columns[label]
     if col_type != expected_type:
         put_error("Column '%s' in %s has type '%s' (expected '%s')" %
                   (label, self.filename, col_type, expected_type))
         return False
     return True
Ejemplo n.º 16
0
 def check_col_type(self, label, expected_type):
     if label not in self.columns:
         put_error("Column '%s' not found in %s" % (label, self.filename))
         sys.exit(1)
     col_type = self.columns[label]
     if col_type != expected_type:
         put_error("Column '%s' in %s has type '%s' (expected '%s')" %
                   (label, self.filename, col_type, expected_type))
         return False
     return True
Ejemplo n.º 17
0
def check_freerflags_column(free_mtz, expected_symmetry):
    names = ['FreeR_flag', 'FREE']
    rfree_meta = read_metadata(free_mtz)
    if not match_symmetry(rfree_meta, expected_symmetry):
        comment("\nWARNING: R-free flag reference file is %s not %s." %
                (rfree_meta.symmetry, expected_symmetry.symmetry))
    for name in names:
        if name in rfree_meta.columns:
            rfree_meta.check_col_type(name, 'I')
            return name
    put_error("free-R column not found in %s" % free_mtz)
    sys.exit(1)
Ejemplo n.º 18
0
def special_mtz_mode(args):
    print('Usage: %s' % USAGE_SHORT)
    check_ccp4_envvars()
    wf = workflow.Workflow('')
    wf.enable_logs = False
    try:
        mtz_meta = wf.read_mtz_metadata(args[0])
        print('Basic MTZ file info:')
        print(mtz_meta.info())
        contam_info = contaminants.get_info(mtz_meta)
        if contam_info:
            print(contam_info)
    except (IOError, RuntimeError) as e:
        put_error(e)
Ejemplo n.º 19
0
Archivo: main.py Proyecto: ccp4/dimple
def special_mtz_mode(args):
    print 'Usage: %s' % USAGE_SHORT
    check_ccp4_envvars()
    wf = workflow.Workflow('')
    wf.enable_logs = False
    try:
        mtz_meta = wf.read_mtz_metadata(args[0])
        print 'Basic MTZ file info:'
        print mtz_meta.info()
        contam_info =  contaminants.get_info(mtz_meta)
        if contam_info:
            print contam_info
    except (IOError, RuntimeError) as e:
        put_error(e)
Ejemplo n.º 20
0
def check_freerflags_column(free_mtz, expected_symmetry, column):
    rfree_meta = read_metadata(free_mtz)
    if expected_symmetry and not match_symmetry(rfree_meta, expected_symmetry):
        comment("\nWARNING: R-free flag reference file is %s not %s." %
                (rfree_meta.symmetry, expected_symmetry.symmetry))
    if column is not None:
        if not rfree_meta.check_col_type(column, 'I'):
            sys.exit(1)
        return column
    for name in DEFAULT_FREE_COLS:
        if name in rfree_meta.columns:
            rfree_meta.check_col_type(name, 'I')
            return name
    put_error("free-R column not found in %s" % free_mtz)
    sys.exit(1)
Ejemplo n.º 21
0
Archivo: mtz.py Proyecto: ccp4/dimple
def check_freerflags_column(free_mtz, expected_symmetry, column):
    rfree_meta = read_metadata(free_mtz)
    if not match_symmetry(rfree_meta, expected_symmetry):
        comment("\nWARNING: R-free flag reference file is %s not %s." %
                (rfree_meta.symmetry, expected_symmetry.symmetry))
    if column is not None:
        if not rfree_meta.check_col_type(column, 'I'):
            sys.exit(1)
        return column
    for name in DEFAULT_FREE_COLS:
        if name in rfree_meta.columns:
            rfree_meta.check_col_type(name, 'I')
            return name
    put_error("free-R column not found in %s" % free_mtz)
    sys.exit(1)
Ejemplo n.º 22
0
Archivo: main.py Proyecto: ccp4/dimple
def special_pdb_mode(args):
    print 'Proper usage: %s' % USAGE_SHORT
    check_ccp4_envvars()
    print '...actually we can run rwcontents for you'
    wf = workflow.Workflow('')
    wf.enable_logs = False
    for p in args:
        try:
            wf.read_pdb_metadata(p, print_errors=True)
            _comment_summary_line(os.path.basename(p), wf.file_info[p])
            wf.rwcontents(xyzin=p).run()
        except (IOError, RuntimeError) as e:
            put_error(e)
        except workflow.JobError as e:
            put_error(e.msg, comment=e.note)
    print '\n\n...but this is NOT how dimple is supposed to be run.'
Ejemplo n.º 23
0
def open_pickled_workflow(file_or_dir):
    if os.path.isdir(file_or_dir):
        pkl = os.path.join(file_or_dir, PICKLE_FILENAME)
    else:
        pkl = file_or_dir
    if not os.path.exists(pkl):
        utils.put_error("workflow data file not found",
                        "No such file or directory: %s" % pkl)
        sys.exit(1)
    f = open(pkl, "rb")
    try:
        return pickle.load(f)
    except pickle.UnpicklingError:
        utils.put_error('"Unpickling" failed',
                        'Maybe this is not a pickle file: %s' % pkl)
        sys.exit(1)
Ejemplo n.º 24
0
def special_pdb_mode(args):
    print('Proper usage: %s' % USAGE_SHORT)
    check_ccp4_envvars()
    print('...actually we can run rwcontents for you')
    wf = workflow.Workflow('')
    wf.enable_logs = False
    for p in args:
        try:
            wf.read_pdb_metadata(p, print_errors=True)
            _comment_summary_line(os.path.basename(p), wf.file_info[p])
            wf.rwcontents(xyzin=p).run()
        except (IOError, RuntimeError) as e:
            put_error(e)
        except workflow.JobError as e:
            put_error(e.msg, comment=e.note)
    print('\n\n...but this is NOT how dimple is supposed to be run.')
Ejemplo n.º 25
0
def open_pickled_workflow(file_or_dir):
    if os.path.isdir(file_or_dir):
        pkl = os.path.join(file_or_dir, PICKLE_FILENAME)
    else:
        pkl = file_or_dir
    if not os.path.exists(pkl):
        utils.put_error("workflow data file not found",
                        "No such file or directory: %s" % pkl)
        sys.exit(1)
    f = open(pkl, "rb")
    try:
        return pickle.load(f)
    except pickle.UnpicklingError:
        utils.put_error('"Unpickling" failed',
                        'Maybe this is not a pickle file: %s' % pkl)
        sys.exit(1)
Ejemplo n.º 26
0
def read_metadata(pdb, print_errors):
    if pdb.endswith('.gz'):
        f = gzip.open(pdb, 'rb')
    else:
        f = open(pdb)
    meta = None
    for line in f:
        if line.startswith('CRYST1'):
            meta = PdbMeta(line)
            break
    if meta is None and print_errors:
        if f.tell() == 0:
            put_error('empty file: %s' % pdb)
        else:
            put_error('CRYST1 line not found in %s' % pdb)
    f.close()
    return meta
Ejemplo n.º 27
0
Archivo: pdb.py Proyecto: ccp4/dimple
def read_metadata(pdb, print_errors):
    if pdb.endswith('.gz'):
        f = gzip.open(pdb, 'rb')
    else:
        f = open(pdb)
    meta = None
    for line in f:
        if line.startswith("CRYST1"):
            meta = PdbMeta(line)
            break
    if meta is None and print_errors:
        if f.tell() == 0:
            put_error("empty file: %s" % pdb)
        else:
            put_error("CRYST1 line not found in %s" % pdb)
    f.close()
    return meta
Ejemplo n.º 28
0
def _find_i_sigi_columns(mtz_meta, opt):
    if opt.icolumn:
        icolumn = opt.icolumn
        mtz_meta.check_col_type(icolumn, 'J')
    else:
        j_columns = [k for k, v in mtz_meta.columns.items() if v == 'J']
        if len(j_columns) == 1:
            icolumn = j_columns[0]
        elif 'IMEAN' in j_columns:
            icolumn = 'IMEAN'
        elif len(j_columns) > 1:
            put_error('Multiple intensity columns: %s. '
                      'Pick one with  --icolumn' % j_columns)
        else:
            put_error('No intensity (IMEAN) column in the MTZ file')

    # the default value of sigicolumn ('SIG<ICOL>') needs substitution
    sigicolumn = opt.sigicolumn.replace('<ICOL>', icolumn)
    mtz_meta.check_col_type(sigicolumn, 'Q')
    return (icolumn, sigicolumn)
Ejemplo n.º 29
0
Archivo: main.py Proyecto: ccp4/dimple
def _check_picture_tools():
    ok = True
    coot_path, coot_ver = coots.find_path_and_version()
    if coot_path:
        if coot_ver is None:
            put_error("coot not working(?), no pictures")
            ok = False
        else:
            if "with python" not in coot_ver:
                put_error("coot with Python support is needed")
                ok = False
            if "\n0.6." in coot_ver:
                put_error("coot 0.7+ is needed (0.6 would crash)")
                ok = False
    else:
        put_error("No coot, no pictures")
        ok = False
    if not utils.syspath("render"):
        put_error("No Raster3d, no pictures")
        ok = False
    return ok
Ejemplo n.º 30
0
def main(args):
    if workflow.parse_workflow_commands():
        return

    options = parse_dimple_commands(args)

    for necessary_var in ("CCP4", "CCP4_SCR"):
        if necessary_var not in os.environ:
            put_error('$%s not found, giving up' % necessary_var)
            sys.exit(1)
    if not os.path.isdir(os.environ["CCP4_SCR"]):
        put_error('No such directory: $CCP4_SCR, refmac shall not work!')

    wf = workflow.Workflow(options.output_dir, from_job=options.from_step)
    utils.start_log(os.path.join(options.output_dir, "dimple.log"),
                    output_dir=options.output_dir)
    utils.log_value("version", __version__)
    utils.start_log_screen(os.path.join(options.output_dir, "screen.log"))
    try:
        dimple(wf=wf, opt=options)
        exit_status = 0
    except workflow.JobError as e:
        put_error(e.msg, comment=e.note)
        try:
            utils.report_disk_space([wf.output_dir, os.getenv("CCP4_SCR")])
        except KeyboardInterrupt:
            comment("\nok, exiting...")
        exit_status = 1
    except RuntimeError as e:
        put_error(e)
        exit_status = 1
    finally:
        comment("\n")
    if options.cleanup:
        wf.delete_files(wf.temporary_files)
    wf.options = options
    wf.dump_pickle()
    return exit_status
Ejemplo n.º 31
0
def main(args):
    if workflow.parse_workflow_commands():
        return

    options = parse_dimple_commands(args)
    check_ccp4_envvars()
    try:
        wf = workflow.Workflow(options.output_dir, from_job=options.from_step)
        utils.start_log(os.path.join(options.output_dir, 'dimple.log'),
                        output_dir=options.output_dir)
        utils.log_value('version', __version__)
        utils.start_log_screen(os.path.join(options.output_dir, 'screen.log'))

        dimple(wf=wf, opt=options)
        check_contaminants_if_bad(wf, mtz=options.mtz)
        exit_status = 0
    except workflow.JobError as e:
        put_error(e.msg, comment=e.note)
        try:
            utils.report_disk_space([wf.output_dir, os.getenv('CCP4_SCR')])
        except KeyboardInterrupt:
            comment('\nok, exiting...')
        exit_status = 1
    except (RuntimeError, IOError, OSError) as e:
        put_error(e)
        exit_status = 1
    finally:
        comment('\n')
    if options.cleanup:
        wf.delete_files(wf.temporary_files)
    wf.options = options
    try:
        wf.dump_pickle()
    except IOError as e:
        put_error(e)
        exit_status = 1
    return exit_status
Ejemplo n.º 32
0
def _generate_scripts_and_pictures(wf, opt, data):
    blobs = data["blobs"] if data else []
    coot_path = coots.find_path()
    if not blobs:
        comment("\nUnmodelled blobs not found.")
    elif opt.img_format:
        if coot_path:
            coot_ver = coots.find_version(coot_path)
            if coot_ver is None:
                put_error("coot not working(?), no pictures")
                opt.img_format = None
            elif "with python" not in coot_ver:
                put_error("coot with Python support is needed")
                opt.img_format = None
        else:
            put_error("No coot, no pictures")
            opt.img_format = None
        if not utils.syspath("render"):
            put_error("No Raster3d, no pictures")
            opt.img_format = None
        if opt.img_format:
            if len(blobs) == 1:
                comment("\nRendering density blob at (%.1f, %.1f, %.1f)" %
                        blobs[0])
            else:
                comment("\nRendering 2 largest blobs: at (%.1f, %.1f, %.1f) "
                        "and at (%.1f, %.1f, %.1f)" % (blobs[0]+blobs[1]))
    com = data and data["center"]

    # run-coot.py centers on the biggest blob. It uses relative paths -
    # it can be run only from the output directory, but is not affected
    # by moving that directory to different location.
    # There are blobN-coot.py scripts generated below with absolute paths.
    # write coot script (apart from pictures) that centers on the biggest blob
    script_path = os.path.join(wf.output_dir, "run-coot.py")
    script = coots.basic_script(pdb=opt.xyzout, mtz=opt.hklout,
                                center=(blobs and blobs[0]), toward=com,
                                white_bg=opt.white_bg)
    _write_script(script_path, script, executable=True)

    # blob images, for now for not more than two blobs
    d = os.path.abspath(wf.output_dir)
    for n, b in enumerate(blobs[:2]):
        py_path = os.path.join(wf.output_dir, "blob%d-coot.py" % (n+1))
        content = coots.basic_script(pdb=os.path.join(d, opt.xyzout),
                                     mtz=os.path.join(d, opt.hklout),
                                     center=blobs[n], toward=com,
                                     white_bg=opt.white_bg)
        _write_script(py_path, content)
    # coot.sh - one-line script for convenience
    if blobs:
        coot_sh_text = '{coot} --no-guano {out}/blob1-coot.py\n'
    else:
        coot_sh_text = '{coot} --no-guano {out}/final.mtz {out}/final.pdb\n'
    coot_sh_path = os.path.join(wf.output_dir, "coot.sh")
    _write_script(coot_sh_path, coot_sh_text.format(coot=coot_path or 'coot',
                                                    out=wf.output_dir),
                  executable=True)

    if opt.img_format and blobs:
        script = ''
        basenames = []
        # as a workaround for buggy coot the maps are reloaded for each blob
        for n, b in enumerate(blobs[:2]):
            script += coots.basic_script(pdb=opt.xyzout, mtz=opt.hklout,
                                         center=b, toward=com,
                                         white_bg=opt.white_bg)
            rs, names = coots.r3d_script(center=b, toward=com,
                                         blobname="blob%s" % (n+1))
            script += rs
            basenames += names
        coot_job = wf.coot_py(script)
        try:
            coot_job.run()
        except workflow.JobError:
            # check for a possible cause to hint the user
            # (possible workaround: change $HOME to non-existing directory)
            if utils.silently_run(coot_job.args, cwd=wf.output_dir)[0] != 0:
                put_error("coot fails with options: --no-graphics --python",
                          comment="It happens when scripts in .coot or "
                                  ".coot-preferences are not compatible\n"
                                  "with the --no-graphics mode.")
            raise
        for n, basename in enumerate(basenames):
            job = wf.render_r3d(basename, img_format=opt.img_format)
            if n % 3 == 0:
                job.run()
            else: # minimal output
                job.run(show_progress=False, new_line=False)
        wf.delete_files([name+".r3d" for name in basenames])
    return coot_sh_path
Ejemplo n.º 33
0
def dimple(wf, opt):
    comment("     ### Dimple v%s. Problems and suggestions:"
            " ccp4.github.io/dimple ###" % __version__)
    mtz_meta = wf.read_mtz_metadata(opt.mtz)
    _comment_summary_line("MTZ (%.1fA)" % mtz_meta.dmax, mtz_meta)
    if opt.dls_naming:
        opt.pdbs = dls_name_filter(opt.pdbs)
    opt.pdbs = utils.filter_out_duplicate_files(opt.pdbs, relto=opt.output_dir)
    if not opt.pdbs:
        comment("\nNo non-empty pdb files given. Nothing to do.")
        return
    for p in opt.pdbs:
        wf.read_pdb_metadata(p, print_errors=(len(opt.pdbs) > 1))
    if len(opt.pdbs) > 1:
        comment("\nPDBs in order of similarity (using the first one):")
        opt.pdbs.sort(key=lambda x: calculate_difference(wf.file_info[x],
                                                         mtz_meta))
    utils.log_value("data_file", opt.mtz)
    utils.log_value("pdb_files", opt.pdbs)
    for p in opt.pdbs:
        _comment_summary_line(os.path.basename(p), wf.file_info[p])
    ini_pdb = "ini.pdb"
    wf.copy_uncompressed(opt.pdbs[0], ini_pdb)
    pdb_meta = wf.file_info[opt.pdbs[0]]
    if pdb_meta is None:
        put_error("PDB file missing CRYST1 record, starting from MR")
    if opt.no_hetatm or check_hetatm_x(wf.path(ini_pdb), pdb_meta):
        if not opt.no_hetatm:
            comment("\nHETATM marked as element X would choke many programs.")
        rb_xyzin = "prepared.pdb"
        wf.temporary_files.add(rb_xyzin)
        n_het = wf.remove_hetatm(xyzin=ini_pdb, xyzout=rb_xyzin,
                                 remove_all=opt.no_hetatm)
        comment("\nRemoved %d HETATM atoms" % n_het)
    else:
        rb_xyzin = ini_pdb
    # run rwcontents even without CRYST1 - it will show mol. weight only
    wf.rwcontents(xyzin=rb_xyzin).run()
    rw_data = wf.jobs[-1].data
    if pdb_meta is None:
        pass # we already had a warning message
    elif rw_data.get('solvent_percent') is None:
        put_error("rwcontents could not interpret %s" % rb_xyzin)
    elif rw_data['solvent_percent'] > HIGH_SOLVENT_PCT:
        comment("\nHmm... %.1f%% of solvent or incomplete model" %
                rw_data['solvent_percent'])
        if abs(wf.jobs[-1].data.get('volume', 0) - pdb_meta.get_volume()) > 10:
            comment("\ndebug: problem when calculating volume?")

    ####### pointless - reindexing #######
    if match_symmetry(mtz_meta, pdb_meta) and opt.mr_when_r > 0 and (
            0.7 < mtz_meta.get_volume() / pdb_meta.get_volume() < 1.4):
        reindexed_mtz = "pointless.mtz"
        wf.temporary_files.add(reindexed_mtz)
        wf.pointless(hklin=opt.mtz, xyzin=rb_xyzin, hklout=reindexed_mtz,
                     keys="TOLERANCE 5").run(may_fail=True)
        alt_reindex = wf.jobs[-1].data.get('alt_reindex')
        if wf.jobs[-1].exit_status == 0 and alt_reindex:
            for ar in alt_reindex:
                comment("\n    %-10s CC: %-8.3f cell diff: %.1fA" % (
                        ar['op'], ar['cc'], ar['cell_deviat']))
        else:
            # until recently (2015) pointless didn't print CC for non-ambiguous
            # spacegroups (e.g. C2), but now it always prints
            comment("\n    no good indexing")
            reindexed_mtz = opt.mtz
    else:
        reindexed_mtz = opt.mtz
    reindexed_mtz_meta = wf.read_mtz_metadata(reindexed_mtz)
    if reindexed_mtz_meta.symmetry != mtz_meta.symmetry:
        _comment_summary_line('reindexed MTZ', reindexed_mtz_meta)

    ####### (c)truncate - calculate amplitudes if needed #######
    if not opt.fcolumn:
        opt.fcolumn = 'F' if 'F' in mtz_meta.columns else 'FP'
    elif opt.icolumn or opt.ItoF_prog:
        put_error('Ignoring options --fcolumn/--sigfcolumn')
    opt.sigfcolumn = opt.sigfcolumn.replace('<FCOL>', opt.fcolumn)
    if (opt.ItoF_prog or opt.icolumn or opt.fcolumn not in mtz_meta.columns
                                  or opt.sigfcolumn not in mtz_meta.columns):
        f_mtz = "amplit.mtz"
        wf.temporary_files.add(f_mtz)
        i_sigi_cols = _find_i_sigi_columns(mtz_meta, opt)
        if opt.ItoF_prog == 'ctruncate' or (opt.ItoF_prog is None and opt.slow):
            wf.ctruncate(hklin=reindexed_mtz, hklout=f_mtz,
                         colin="/*/*/[%s,%s]" % i_sigi_cols).run()
        else:
            wf.truncate(hklin=reindexed_mtz, hklout=f_mtz,
                        labin="IMEAN=%s SIGIMEAN=%s" % i_sigi_cols,
                        labout="F=F SIGF=SIGF").run()
        opt.fcolumn = 'F'
        opt.sigfcolumn = 'SIGF'
    else:
        f_mtz = reindexed_mtz

    ####### rigid body - check if model is good for refinement? #######
    refmac_labin_nofree = "FP=%s SIGFP=%s" % (opt.fcolumn, opt.sigfcolumn)
    refmac_xyzin = None
    cell_diff = calculate_difference(pdb_meta, reindexed_mtz_meta)
    if pdb_meta is None:
        pass # the error message was already printed
    elif opt.mr_when_r <= 0:
        comment("\nMR requested unconditionally.")
    elif cell_diff > 0.1 and opt.mr_when_r < 1:
        comment("\nDifferent unit cells.")
    elif pdb_meta.symmetry != reindexed_mtz_meta.symmetry:
        comment("\nDifferent space groups.")
    else:
        comment("\nRigid-body refinement with resolution 3.5 A, 10 cycles.")
        if 'aa_count' in rw_data and 'water_count' in rw_data:
            if rw_data['aa_count'] != 0:
                comment(" %.1f waters/aa." % (rw_data['water_count'] /
                                              rw_data['aa_count']))
            else:
                comment(' %d/0 waters/aa.' % rw_data['water_count'])
        wf.temporary_files |= {"refmacRB.pdb", "refmacRB.mtz"}
        # it may fail because of "Disagreement between mtz and pdb"
        wf.refmac5(hklin=f_mtz, xyzin=rb_xyzin,
                   hklout="refmacRB.mtz", xyzout="refmacRB.pdb",
                   labin=refmac_labin_nofree,
                   libin=None,
                   keys="""refinement type rigidbody resolution 15 3.5
                           rigidbody ncycle 10""").run(may_fail=True)
        # if the error is caused by mtz/pdb disagreement, continue with MR
        if wf.jobs[-1].exit_status != 0:
            comment("\nTry MR.")
        elif not wf.jobs[-1].data.get("overall_r"):
            comment("\nWARNING: unknown R factor, something went wrong.\n")
            refmac_xyzin = "refmacRB.pdb"
        elif wf.jobs[-1].data["overall_r"] > opt.mr_when_r:
            comment("\nRun MR for R > %g." % opt.mr_when_r)
        else:
            comment("\nNo MR for R < %g." % opt.mr_when_r)
            refmac_xyzin = "refmacRB.pdb"

    ####### phaser/molrep - molecular replacement #######
    if refmac_xyzin is None:
        vol_ratio = None
        if pdb_meta:
            # num_mol accounts for strict NCS (MTRIX without iGiven)
            vol_ratio = (mtz_meta.asu_volume() /
                         pdb_meta.asu_volume(rw_data['num_mol']))
            comment(" Volume of asu: %.1f%% of model asu." % (100 * vol_ratio))
        if opt.mr_when_r >= 1:
            comment("\nWould try MR, but it is disabled.")
            return
        if opt.mr_num:
            mr_num = opt.mr_num
        else:
            mr_num = guess_number_of_molecules(mtz_meta, rw_data, vol_ratio)
        mw = rw_data.get('weight')
        if isinstance(mr_num, float):
            wf.ensembler(pdbin=rb_xyzin, root='ens').run()
            n_models = len(wf.jobs[-1].data['models'])
            mw = None
            rb_xyzin = "ens_merged.pdb"
            mr_num = max(int(round(mr_num * n_models)), 1)
        # phaser is used by default if number of searched molecules is known
        if opt.mr_prog == 'molrep':
            wf.temporary_files |= {"molrep.pdb", "molrep_dimer.pdb",
                                   "molrep.crd"}
            wf.molrep(f=f_mtz, m=rb_xyzin).run()
            refmac_xyzin = "molrep.pdb"
        else:
            wf.temporary_files |= {"phaser.1.pdb", "phaser.1.mtz"}
            wf.phaser_auto(hklin=f_mtz,
                           labin="F=%s SIGF=%s" % (opt.fcolumn, opt.sigfcolumn),
                           model=dict(pdb=rb_xyzin, identity=100, num=mr_num,
                                      mw=mw),
                           sg_alt="ALL", opt=opt,
                           root='phaser').run(may_fail=True)
            if not _after_phaser_comments(wf.jobs[-1],
                                          sg_in=reindexed_mtz_meta.symmetry):
                return
            refmac_xyzin = "phaser.1.pdb"
            f_mtz = "phaser.1.mtz"

    if False:
        wf.findwaters(pdbin=refmac_xyzin, hklin=f_mtz,
                      f="FC", phi="PHIC", pdbout="prepared_wat.pdb", sigma=2)
        refmac_xyzin = "prepared_wat.pdb"

    ####### adding free-R flags #######
    f_mtz_meta = wf.read_mtz_metadata(f_mtz)
    cad_reso = opt.reso or (f_mtz_meta.dmax - MtzMeta.d_eps)
    if opt.free_r_flags:
        free_mtz = opt.free_r_flags
        free_col = check_freerflags_column(wf.path(free_mtz),
                                           expected_symmetry=pdb_meta,
                                           column=opt.freecolumn)
        comment("\nFree-R flags from the %s file, column %s." %
                (("reference" if free_mtz != opt.mtz else 'input'), free_col))
    else:
        free_col = DEFAULT_FREE_COLS[0]
        if free_col in f_mtz_meta.columns:
            comment("\nReplace free-R flags")
        else:
            comment("\nGenerate free-R flags")
        free_mtz = "free.mtz"
        wf.temporary_files |= {"unique.mtz", free_mtz}
        if opt.seed_freerflag or cell_diff > 1e3: # i.e. different SG
            wf.unique(hklout="unique.mtz", ref=f_mtz_meta,
                      resolution=cad_reso).run()
        else:
            comment(" (repeatably)")
            # Here we'd like to have always the same set of free-r flags
            # for given PDB file. That's why we don't use information
            # from the data file (mtz).
            wf.unique(hklout="unique.mtz", ref=pdb_meta, resolution=1.0).run()
        # CCP4 freerflag uses always the same pseudo-random sequence by default
        wf.freerflag(hklin="unique.mtz", hklout=free_mtz,
                     keys=("SEED" if opt.seed_freerflag else "")).run()

    if free_mtz == opt.mtz and opt.reso is None:
        prepared_mtz = f_mtz
    else:
        prepared_mtz = "prepared.mtz"
        wf.temporary_files.add(prepared_mtz)
        wf.cad(data_in=[(f_mtz,
                         [c for c in f_mtz_meta.columns if c != free_col]),
                        (free_mtz, [free_col])],
               hklout=prepared_mtz,
               keys=["sysab_keep",  # does it matter?
                     "reso overall 1000.0 %g" % cad_reso]).run()
    freerflag_missing = wf.count_mtz_missing(prepared_mtz, free_col)
    if freerflag_missing:
        wf.freerflag(hklin=prepared_mtz, hklout="prepared2.mtz",
                     keys="COMPLETE FREE="+free_col,
                     parser=" (again, for %d refl. more)" % freerflag_missing
                    ).run()
        prepared_mtz = "prepared2.mtz"
        wf.temporary_files.add(prepared_mtz)

    ####### refinement #######
    if opt.weight:
        refmac_weight = "matrix %f" % opt.weight
    else:
        refmac_weight = "auto"
    restr_ref_keys = """\
     make newligand continue
     refinement type restrained
     weight %s
     """ % refmac_weight
    if opt.freecolumn_val:
        restr_ref_keys += "free %s\n" % opt.freecolumn_val
    refmac_labin = "%s FREE=%s" % (refmac_labin_nofree, free_col)
    comment("\nRestrained refinement, %d+%d cycles." % (opt.jelly,
                                                        opt.restr_cycles))
    if opt.jelly:
        wf.temporary_files |= {"jelly.pdb", "jelly.mtz"}
        wf.refmac5(hklin=prepared_mtz, xyzin=refmac_xyzin,
                   hklout="jelly.mtz", xyzout="jelly.pdb",
                   labin=refmac_labin, libin=opt.libin,
                   keys=restr_ref_keys+"ridge distance sigma 0.01\n"
                                       "make hydrogen no\n"
                                       "ncycle %d" % opt.jelly
                                      +opt.extra_ref_keys).run()
        comment(_refmac_rms_line(wf.jobs[-1].data))
        refmac_xyzin = "jelly.pdb"
    restr_job = wf.refmac5(hklin=prepared_mtz, xyzin=refmac_xyzin,
                 hklout=opt.hklout, xyzout=opt.xyzout,
                 labin=refmac_labin, libin=opt.libin,
                 keys=restr_ref_keys+("ncycle %d" % opt.restr_cycles)
                                    +opt.extra_ref_keys).run()
    comment(_refmac_rms_line(restr_job.data))
    # if that run is repeated with --from-step it's useful to compare Rfree
    if wf.from_job > 0 and wf.from_job <= len(wf.jobs): # from_job is 1-based
        prev = [j for j in wf.repl_jobs if j.name == restr_job.name]
        if prev and prev[0].data and "free_r" in prev[0].data:
            comment("\nPreviously:  R/Rfree %.4f/%.4f  Rfree change: %+.4f" % (
                    prev[0].data["overall_r"], prev[0].data["free_r"],
                    restr_job.data["free_r"] - prev[0].data["free_r"]))

    ####### check blobs and finish #######
    if restr_job.data["free_r"] <= BAD_FINAL_RFREE:
        fb_job = wf.find_blobs(opt.hklout, opt.xyzout, sigma=0.8).run()
        coot_script = _generate_scripts_and_pictures(wf, opt, fb_job.data)
        if coot_script:
            comment("\nTo see it in Coot run %s" % coot_script)
    else:
        comment("\nGiving up (Rfree > %g). No blob search." % BAD_FINAL_RFREE)
        _generate_scripts_and_pictures(wf, opt, None)
Ejemplo n.º 34
0
Archivo: main.py Proyecto: ccp4/dimple
def parse_dimple_commands(args):
    dstr = ' (default: %(default)s)'
    parser = argparse.ArgumentParser(
                usage=USAGE_SHORT, epilog=workflow.commands_help, prog=PROG,
                formatter_class=argparse.RawDescriptionHelpFormatter)
    # positional args can be separated by options, but not after the 3rd one
    # see http://bugs.python.org/issue15112 , http://bugs.python.org/issue14191
    parser.add_argument('pos_arg1')
    parser.add_argument('pos_arg2')
    parser.add_argument('pos_arg3')
    parser.add_argument('more_args', nargs='*')
    group1 = parser.add_argument_group('most commonly used options')
    group1.add_argument('-s', '--slow', action='count',
                        help='more refinement, etc. (can be used 2x)')
    group1.add_argument('-M', '--mr-when-r', type=float, default=0.4,
                        metavar='NUM',
                        help='threshold for Molecular Replacement'+dstr)
    group2 = parser.add_argument_group('options contolling input/output')
    group2.add_argument('-I', '--icolumn', metavar='ICOL',
                        help='I column label (default: IMEAN)')
    group2.add_argument('--sigicolumn', metavar='SIGICOL', default='SIG<ICOL>',
                        help='SIGI column label'+dstr)
    group2.add_argument('--fcolumn', metavar='FCOL',
                        help='F column label (default: F)')
    group2.add_argument('--sigfcolumn', metavar='SIGFCOL', default='SIG<FCOL>',
                        help='SIGF column label'+dstr)
    group2.add_argument('--libin', metavar='CIF',
                        help='ligand descriptions for refmac (LIBIN)')
    group2.add_argument('-R', '--free-r-flags', metavar='MTZ_FILE',
                        help='file with freeR flags '
                             '("-" = use flags from data mtz)')
    group2.add_argument('--freecolumn', metavar='COL[=N]',
                        help='Rfree column with optional value (default: 0)')
    group2.add_argument('--hklout', metavar='out.mtz', default='final.mtz',
                        help='output mtz file'+dstr)
    group2.add_argument('--xyzout', metavar='out.pdb', default='final.pdb',
                        help='output pdb file'+dstr)
    group2.add_argument('-f', choices=['png', 'jpeg', 'tiff', 'none'],
                        dest='img_format',
                        help='format of generated images'+dstr)
    group2.add_argument('--no-cleanup', dest='cleanup', action='store_false',
                        help='leave intermediate files')
    group2.add_argument('--cleanup', action='store_true',
                        help=argparse.SUPPRESS)  # obsolete

    group3 = parser.add_argument_group('options customizing the run')
    group3.add_argument('--no-hetatm', action='store_true',
                        help='remove HETATM atoms from the given model')
    group3.add_argument('--jelly', metavar='N_ITER', type=int,
                    help='run refmac jelly-body before the final refinement')
    group3.add_argument('--reso', type=float, help='limit the resolution [A]')
    group3.add_argument('--restr-cycles', metavar='N', type=int,
                        help='cycles of refmac final refinement (default: 8)')
    group3.add_argument('--weight', metavar='VALUE', type=float,
                        help='refmac matrix weight (default: auto-weight)')

    group3.add_argument('--mr-prog', choices=['phaser', 'molrep'],
                        default='phaser',
                        help='Molecular Replacement program' + dstr)
    group3.add_argument('--mr-num', type=int,
                        help='number of molecules for MR (default: auto)')
    group3.add_argument('--mr-reso', type=float, default=3.25,
                        help='high resolution for MR '
                             '(if >10 interpreted as eLLG)' + dstr)
    group3.add_argument('--ItoF-prog', choices=['truncate', 'ctruncate'],
                        help='program to calculate amplitudes')
    group3.add_argument('--seed-freerflag', action='store_true',
                        help=argparse.SUPPRESS)
    group3.add_argument('--dls-naming', action='store_true',
                        help=argparse.SUPPRESS)
    group3.add_argument('--from-step', metavar='N', type=int, default=0,
                        help=argparse.SUPPRESS)
    parser.add_argument('--version', action='version',
                        version='%(prog)s '+__version__)
    # customize usage message: get rid of 'positional arguments',
    # rename default 'optional arguments' and shift it to the end.
    # pylint: disable=protected-access
    default_group = parser._action_groups[1]
    default_group.title = 'other options'
    parser._action_groups = parser._action_groups[2:] + [default_group]

    # special mode for compatibility with ccp4i
    legacy_args = {"HKLIN": "", "XYZIN": "",
                   "HKLOUT": "--hklout", "XYZOUT": "--xyzout"}
    if len(args) == 8 and args[0] in legacy_args:
        args = [legacy_args.get(a) or a
                for a in args if legacy_args.get(a) != ""]
        output_dir = os.path.join(os.environ.get("CCP4_SCR", ''), "dimple_out")
        args.append(output_dir)

    # special mode for checking pdb file[s]
    if len(args) >= 1 and all(arg.endswith('.pdb') for arg in args):
        special_pdb_mode(args)
        sys.exit(0)

    # special mode for checking mtz file
    if len(args) == 1 and args[0].endswith('.mtz'):
        special_mtz_mode(args)
        sys.exit(1)

    opt = parser.parse_args(args)
    all_args = [opt.pos_arg1, opt.pos_arg2, opt.pos_arg3] + opt.more_args
    # all_args should be one mtz, one or more pdbs and output_dir
    opt.output_dir = all_args.pop()
    if opt.img_format == 'none':  # this option is kept for compatibility only
        opt.img_format = None
    if (opt.output_dir.endswith('.mtz') or opt.output_dir.endswith('.pdb')
            or opt.output_dir.endswith('.gz')):
        put_error('The last argument should be output directory')
        sys.exit(1)
    # special mode for re-running jobs
    if all_args[0] == 'rerun':
        if os.path.isdir(all_args[1]):
            logfile = os.path.join(all_args[1], 'dimple.log')
        else:
            logfile = all_args[1]
        old_wf = utils.read_section_from_log(logfile, 'workflow')
        try:
            old_dir = os.path.join(old_wf['cwd'], old_wf['output_dir'])
            old_pdb = os.path.join(old_dir, 'ini.pdb')
            if not os.path.exists(old_pdb):
                if not old_wf.get('pdb_files'):
                    put_error('No pdb files in the original run?')
                    sys.exit(1)
                old_pdb = os.path.join(old_dir, old_wf['pdb_files'][0])
            if 'data_file' not in old_wf:  # temporary, to be removed soon
                old_mtz_arg = [a for a in old_wf['args'].split()
                               if a.endswith('.mtz')][0]
                old_wf['data_file'] = os.path.join(old_wf['cwd'], old_mtz_arg)
            old_mtz = os.path.join(old_dir, old_wf['data_file'])
        except (TypeError, KeyError):
            put_error('Reading logfile failed', comment='is it dimple.log?')
            sys.exit(1)
        all_args[0:2] = [old_pdb, old_mtz]

    mtz_args = [a for a in all_args if a.lower().endswith('.mtz')]
    if len(mtz_args) != 1:
        put_error("One mtz file should be given")
        sys.exit(1)
    opt.mtz = mtz_args[0]
    all_args.remove(opt.mtz)
    opt.pdbs = all_args
    for n, a in enumerate(opt.pdbs):
        if is_pdb_id(a):
            opt.pdbs[n] = download_pdb(a, opt.output_dir)
        elif not (a.lower().endswith('.pdb') or a.lower().endswith('.pdb.gz')):
            put_error("unexpected arg (neither mtz nor pdb): %s" % a)
            sys.exit(1)
    if len(opt.pdbs) == 0:
        put_error("At least one pdb file should be given")
        sys.exit(1)
    if opt.seed_freerflag and opt.free_r_flags:
        put_error("Option --seed-freerflag and --free-r-flags"
                  " don't make sense together")
        sys.exit(1)
    if opt.free_r_flags == '-':
        opt.free_r_flags = opt.mtz
    opt.freecolumn_val = None
    if opt.freecolumn and '=' in opt.freecolumn:
        opt.freecolumn, opt.freecolumn_val = opt.freecolumn.rsplit('=', 1)
    if opt.freecolumn and not opt.free_r_flags:
        if opt.freecolumn == DEFAULT_FREE_COLS[0] and opt.freecolumn_val:
            pass # this may be useful for excluding different set
        else:
            put_error("--freecolumn suggests that you want to use existing free"
                      " flags.\nFor this you need also option --free-r-flags")
            sys.exit(1)

    # extra checks
    for filename in opt.pdbs + [opt.mtz, opt.free_r_flags, opt.libin]:
        if filename and not os.path.isfile(filename):
            put_error("File not found: " + filename)
            sys.exit(1)
    if os.path.exists(opt.output_dir) and not os.path.isdir(opt.output_dir):
        put_error("Not a directory: " + opt.output_dir)
        sys.exit(1)

    # Since we'll execute programs from opt.output_dir, adjust paths.
    opt.mtz = utils.adjust_path(opt.mtz, opt.output_dir)
    opt.pdbs = [utils.adjust_path(a, opt.output_dir) for a in opt.pdbs]
    if opt.free_r_flags:
        opt.free_r_flags = utils.adjust_path(opt.free_r_flags, opt.output_dir)
    if opt.libin:
        opt.libin = utils.adjust_path(opt.libin, opt.output_dir)

    # the default value of sigicolumn ('SIG<ICOL>') needs substitution
    opt.sigicolumn = opt.sigicolumn.replace('<ICOL>', opt.icolumn or 'IMEAN')

    # set defaults that depend on the 'slow' level
    if opt.slow is None:
        opt.slow = 0
    elif opt.slow > 2:
        opt.slow = 2
    if opt.restr_cycles is None:
        opt.restr_cycles = [8, 10, 12][opt.slow]
    if opt.jelly is None:
        opt.jelly = [4, 10, 100][opt.slow]

    return opt
Ejemplo n.º 35
0
def parse_dimple_commands(args):
    dstr = ' (default: %(default)s)'
    parser = argparse.ArgumentParser(  # noqa: E126 visual indent
        usage=USAGE_SHORT,
        epilog=workflow.commands_help,
        prog=PROG,
        formatter_class=argparse.RawDescriptionHelpFormatter)
    # positional args can be separated by options, but not after the 3rd one
    # see http://bugs.python.org/issue15112 , http://bugs.python.org/issue14191
    parser.add_argument('pos_arg1')
    parser.add_argument('pos_arg2')
    parser.add_argument('pos_arg3')
    parser.add_argument('more_args', nargs='*')
    group1 = parser.add_argument_group('most commonly used options')
    group1.add_argument('-s',
                        '--slow',
                        action='count',
                        help='more refinement, etc. (can be used 2x)')
    group1.add_argument('-M',
                        '--mr-when-r',
                        type=float,
                        default=0.4,
                        metavar='NUM',
                        help='threshold for Molecular Replacement' + dstr)
    group2 = parser.add_argument_group('options contolling input/output')
    group2.add_argument('-I',
                        '--icolumn',
                        metavar='ICOL',
                        help='I column label (default: IMEAN)')
    group2.add_argument('--sigicolumn',
                        metavar='SIGICOL',
                        default='SIG<ICOL>',
                        help='SIGI column label' + dstr)
    group2.add_argument('--fcolumn',
                        metavar='FCOL',
                        help='F column label (default: F)')
    group2.add_argument('--sigfcolumn',
                        metavar='SIGFCOL',
                        default='SIG<FCOL>',
                        help='SIGF column label' + dstr)
    group2.add_argument('--libin',
                        metavar='CIF',
                        help='ligand descriptions for refmac (LIBIN)')
    group2.add_argument('--refmac-key',
                        metavar='LINE',
                        action='append',
                        help='extra Refmac keywords to be used in refinement')
    group2.add_argument('-R',
                        '--free-r-flags',
                        metavar='MTZ_FILE',
                        help='file with freeR flags '
                        '("-" = use flags from data mtz)')
    group2.add_argument('--freecolumn',
                        metavar='COL[=N]',
                        help='Rfree column with optional value (default: 0)')
    group2.add_argument('--hklout',
                        metavar='out.mtz',
                        default='final.mtz',
                        help='output mtz file' + dstr)
    group2.add_argument('--xyzout',
                        metavar='out.pdb',
                        default='final.pdb',
                        help='output pdb file' + dstr)
    group2.add_argument('-f',
                        choices=['png', 'jpeg', 'none'],
                        dest='img_format',
                        help='format of generated images' + dstr)
    group2.add_argument('--white-bg',
                        dest='white_bg',
                        action='store_true',
                        help='white background in Coot and in images')
    group2.add_argument('--no-cleanup',
                        dest='cleanup',
                        action='store_false',
                        help='leave intermediate files')
    group2.add_argument('--cleanup',
                        action='store_true',
                        help=argparse.SUPPRESS)  # obsolete

    group_w = parser.add_argument_group('what is calculated')
    group_w.add_argument('--no-blob-search',
                         dest='blob_search',
                         action='store_false',
                         help='do not search for unmodelled blobs')
    group_w.add_argument('--anode',
                         action='store_true',
                         help='use SHELX/AnoDe to find peaks in anomalous map')

    group3 = parser.add_argument_group('options customizing the run')
    group3.add_argument('--no-hetatm',
                        action='store_true',
                        help='remove HETATM atoms from the given model')
    group3.add_argument('--rigid-cycles',
                        metavar='N',
                        type=int,
                        help='cycles of rigid-body refinement (default: 10)')
    group3.add_argument('--jelly',
                        metavar='N',
                        type=int,
                        help='cycles of jelly-body refinement (default: 4)')
    group3.add_argument('--restr-cycles',
                        metavar='N',
                        type=int,
                        help='cycles of refmac final refinement (default: 8)')
    group3.add_argument('--reso', type=float, help='limit the resolution [A]')
    group3.add_argument('--weight',
                        metavar='VALUE',
                        type=float,
                        help='refmac matrix weight (default: auto-weight)')
    group3.add_argument('--mr-prog',
                        choices=['phaser', 'molrep'],
                        default='phaser',
                        help='Molecular Replacement program' + dstr)
    group3.add_argument('--mr-num',
                        type=int,
                        help='number of molecules for MR (default: auto)')
    group3.add_argument('--mr-reso',
                        type=float,
                        default=3.25,
                        help='high resolution for MR '
                        '(if >10 interpreted as eLLG)' + dstr)
    group3.add_argument('--ItoF-prog',
                        choices=['truncate', 'ctruncate'],
                        help='program to calculate amplitudes')
    group3.add_argument('--seed-freerflag',
                        action='store_true',
                        help=argparse.SUPPRESS)
    group3.add_argument('--dls-naming',
                        action='store_true',
                        help=argparse.SUPPRESS)
    group3.add_argument('--from-step',
                        metavar='N',
                        type=int,
                        default=0,
                        help=argparse.SUPPRESS)

    parser.add_argument('--version',
                        action='version',
                        version='%(prog)s ' + __version__)
    # customize usage message: get rid of 'positional arguments',
    # rename default 'optional arguments' and shift it to the end.
    # pylint: disable=protected-access
    default_group = parser._action_groups[1]
    default_group.title = 'other options'
    parser._action_groups = parser._action_groups[2:] + [default_group]

    # special mode for compatibility with ccp4i
    legacy_args = {
        'HKLIN': '',
        'XYZIN': '',
        'HKLOUT': '--hklout',
        'XYZOUT': '--xyzout'
    }
    if len(args) == 8 and args[0] in legacy_args:
        args = [
            legacy_args.get(a) or a for a in args if legacy_args.get(a) != ''
        ]
        output_dir = os.path.join(os.environ.get('CCP4_SCR', ''), 'dimple_out')
        args.append(output_dir)

    # special mode for checking pdb file[s]
    if len(args) >= 1 and all(arg.endswith('.pdb') for arg in args):
        special_pdb_mode(args)
        sys.exit(0)

    # special mode for checking mtz file
    if len(args) == 1 and args[0].endswith('.mtz'):
        special_mtz_mode(args)
        sys.exit(1)

    opt = parser.parse_args(args)
    all_args = [opt.pos_arg1, opt.pos_arg2, opt.pos_arg3] + opt.more_args
    # all_args should be one mtz, one or more pdbs and output_dir
    opt.output_dir = all_args.pop()
    if opt.img_format == 'none':  # this option is kept for compatibility only
        opt.img_format = None
    if (opt.output_dir.endswith('.mtz') or opt.output_dir.endswith('.pdb')
            or opt.output_dir.endswith('.gz')):
        put_error('The last argument should be output directory')
        sys.exit(1)
    # special mode for re-running jobs
    if all_args[0] == 'rerun':
        if os.path.isdir(all_args[1]):
            logfile = os.path.join(all_args[1], 'dimple.log')
        else:
            logfile = all_args[1]
        old_wf = utils.read_section_from_log(logfile, 'workflow')
        try:
            old_dir = os.path.join(old_wf['cwd'], old_wf['output_dir'])
            old_pdb = os.path.join(old_dir, 'ini.pdb')
            if not os.path.exists(old_pdb):
                if not old_wf.get('pdb_files'):
                    put_error('No pdb files in the original run?')
                    sys.exit(1)
                old_pdb = os.path.join(old_dir, old_wf['pdb_files'][0])
            if 'data_file' not in old_wf:  # temporary, to be removed soon
                old_mtz_arg = [
                    a for a in old_wf['args'].split() if a.endswith('.mtz')
                ][0]
                old_wf['data_file'] = os.path.join(old_wf['cwd'], old_mtz_arg)
            old_mtz = os.path.join(old_dir, old_wf['data_file'])
        except (TypeError, KeyError):
            put_error('Reading logfile failed', comment='is it dimple.log?')
            sys.exit(1)
        all_args[0:2] = [old_pdb, old_mtz]

    mtz_args = [a for a in all_args if a.lower().endswith('.mtz')]
    if len(mtz_args) != 1:
        put_error('One mtz file should be given')
        sys.exit(1)
    opt.mtz = mtz_args[0]
    all_args.remove(opt.mtz)
    opt.pdbs = all_args
    for n, a in enumerate(opt.pdbs):
        if is_pdb_id(a):
            opt.pdbs[n] = download_pdb(a, opt.output_dir)
        elif not any(a.lower().endswith(ext)
                     for ext in ['.pdb', '.pdb.gz', '.ent', '.ent.gz']):
            put_error('unexpected arg (neither mtz nor pdb): %s' % a)
            sys.exit(1)
    if len(opt.pdbs) == 0:
        put_error('At least one pdb file should be given')
        sys.exit(1)
    if opt.seed_freerflag and opt.free_r_flags:
        put_error('Option --seed-freerflag and --free-r-flags'
                  ' do not make sense together')
        sys.exit(1)
    if opt.free_r_flags == '-':
        opt.free_r_flags = opt.mtz
    opt.freecolumn_val = None
    if opt.freecolumn and '=' in opt.freecolumn:
        opt.freecolumn, opt.freecolumn_val = opt.freecolumn.rsplit('=', 1)
    if opt.freecolumn and not opt.free_r_flags:
        if opt.freecolumn == DEFAULT_FREE_COLS[0] and opt.freecolumn_val:
            pass  # this may be useful for excluding different set
        else:
            put_error(
                '--freecolumn suggests that you want to use existing free'
                ' flags.\nFor this you need also option --free-r-flags')
            sys.exit(1)
    opt.extra_ref_keys = ''.join('\n' + key for key in opt.refmac_key or [])

    # extra checks
    for filename in opt.pdbs + [opt.mtz, opt.free_r_flags, opt.libin]:
        if filename and not os.path.isfile(filename):
            put_error('File not found: ' + filename)
            sys.exit(1)
    if os.path.exists(opt.output_dir) and not os.path.isdir(opt.output_dir):
        put_error('Not a directory: ' + opt.output_dir)
        sys.exit(1)

    # Since we'll execute programs from opt.output_dir, adjust paths.
    opt.mtz = utils.adjust_path(opt.mtz, opt.output_dir)
    opt.pdbs = [utils.adjust_path(a, opt.output_dir) for a in opt.pdbs]
    if opt.free_r_flags:
        opt.free_r_flags = utils.adjust_path(opt.free_r_flags, opt.output_dir)
    if opt.libin:
        opt.libin = utils.adjust_path(opt.libin, opt.output_dir)

    # set defaults that depend on the 'slow' level
    if opt.slow is None:
        opt.slow = 0
    elif opt.slow > 2:
        opt.slow = 2
    if opt.rigid_cycles is None:
        opt.rigid_cycles = 10
    if opt.restr_cycles is None:
        opt.restr_cycles = [8, 10, 12][opt.slow]
    if opt.jelly is None:
        opt.jelly = [4, 10, 100][opt.slow]

    return opt
Ejemplo n.º 36
0
def _generate_scripts_and_pictures(wf, opt, data, pha=None):
    blobs = data['blobs'] if data else []
    coot_path = coots.find_path()
    if not blobs:
        comment('\nUnmodelled blobs not found.')
    elif opt.img_format:
        if coot_path:
            coot_ver = coots.find_version(coot_path)
            if coot_ver is None:
                put_error('coot not working(?), no pictures')
                opt.img_format = None
            elif 'with python' not in coot_ver:
                put_error('coot with Python support is needed')
                opt.img_format = None
        else:
            put_error('No coot, no pictures')
            opt.img_format = None
        if not utils.syspath('render'):
            put_error('No Raster3d, no pictures')
            opt.img_format = None
        if opt.img_format:
            if len(blobs) == 1:
                comment('\nRendering density blob at (%.1f, %.1f, %.1f)' %
                        blobs[0])
            else:
                comment('\nRendering 2 largest blobs: at (%.1f, %.1f, %.1f) '
                        'and at (%.1f, %.1f, %.1f)' % (blobs[0] + blobs[1]))
    com = data and data.get('center')
    if pha:
        normal_map = False
        refl = pha
        prefix = 'anom-'
    else:
        normal_map = True
        refl = opt.hklout
        prefix = ''

    # run-coot.py centers on the biggest blob. It uses relative paths -
    # it can be run only from the output directory, but is not affected
    # by moving that directory to different location.
    # There are blobN-coot.py scripts generated below with absolute paths.
    # write coot script (apart from pictures) that centers on the biggest blob
    script_path = os.path.join(wf.output_dir, prefix + 'run-coot.py')
    script = coots.basic_script(pdb=opt.xyzout,
                                refl=refl,
                                normal_map=normal_map,
                                center=(blobs and blobs[0]),
                                toward=com,
                                white_bg=opt.white_bg)
    _write_script(script_path, script, executable=True)

    # blob images, for now for not more than two blobs
    d = os.path.abspath(wf.output_dir)
    for n, b in enumerate(blobs[:2]):
        py_path = os.path.join(wf.output_dir,
                               '%sblob%d-coot.py' % (prefix, n + 1))
        content = coots.basic_script(pdb=os.path.join(d, opt.xyzout),
                                     refl=os.path.join(d, refl),
                                     normal_map=normal_map,
                                     center=blobs[n],
                                     toward=com,
                                     white_bg=opt.white_bg)
        _write_script(py_path, content)
    # coot.sh - one-line script for convenience
    if blobs:
        coot_sh_text = '{coot} --no-guano {out}/%sblob1-coot.py\n' % prefix
    else:
        coot_sh_text = '{coot} --no-guano {out}/final.mtz {out}/final.pdb\n'
    coot_sh_path = os.path.join(wf.output_dir, prefix + 'coot.sh')
    _write_script(coot_sh_path,
                  coot_sh_text.format(coot=coot_path or 'coot',
                                      out=wf.output_dir),
                  executable=True)

    if opt.img_format and blobs:
        script = ''
        basenames = []
        # as a workaround for buggy coot the maps are reloaded for each blob
        for n, b in enumerate(blobs[:2]):
            script += coots.basic_script(pdb=opt.xyzout,
                                         refl=refl,
                                         normal_map=normal_map,
                                         center=b,
                                         toward=com,
                                         white_bg=opt.white_bg)
            rs, names = coots.r3d_script(center=b,
                                         toward=com,
                                         blobname='%sblob%s' % (prefix, n + 1))
            script += rs
            basenames += names
        coot_job = wf.coot_py(script)
        try:
            coot_job.run()
        except workflow.JobError:
            # check for a possible cause to hint the user
            # (possible workaround: change $HOME to non-existing directory)
            if utils.silently_run(coot_job.args, cwd=wf.output_dir)[0] != 0:
                put_error('coot fails with options: --no-graphics --python',
                          comment='It happens when scripts in .coot or '
                          '.coot-preferences are not compatible\n'
                          'with the --no-graphics mode.')
            raise
        for n, basename in enumerate(basenames):
            try:
                job = wf.render_r3d(basename, img_format=opt.img_format)
                if n % 3 == 0:
                    job.run()
                else:  # minimal output
                    job.run(show_progress=False, new_line=False)
                wf.delete_files([basename + '.r3d'])
            except workflow.JobError as e:
                # Raster3D may fail saying "increase MAXDET and recompile".
                # This is not critical, so Dimple doesn't stop.
                put_error('Rendering failed, no picture', comment=' ' + e.note)
    return coot_sh_path
Ejemplo n.º 37
0
def dimple(wf, opt):
    comment('     ### Dimple v%s. Problems and suggestions:'
            ' ccp4.github.io/dimple ###' % __version__)
    mtz_meta = wf.read_mtz_metadata(opt.mtz)
    _comment_summary_line('MTZ (%.1fA)' % mtz_meta.dmax, mtz_meta)
    if opt.dls_naming:
        opt.pdbs = dls_name_filter(opt.pdbs)
    opt.pdbs = utils.filter_out_duplicate_files(opt.pdbs, relto=opt.output_dir)
    if not opt.pdbs:
        comment('\nNo non-empty pdb files given. Nothing to do.')
        return
    for p in opt.pdbs:
        wf.read_pdb_metadata(p, print_errors=(len(opt.pdbs) > 1))
    if len(opt.pdbs) > 1:
        comment('\nPDBs in order of similarity (using the first one):')
        opt.pdbs.sort(
            key=lambda x: calculate_difference(wf.file_info[x], mtz_meta))
    utils.log_value('data_file', opt.mtz)
    utils.log_value('pdb_files', opt.pdbs)
    for p in opt.pdbs:
        _comment_summary_line(os.path.basename(p), wf.file_info[p])
    ini_pdb = 'ini.pdb'
    wf.copy_uncompressed(opt.pdbs[0], ini_pdb)
    pdb_meta = wf.file_info[opt.pdbs[0]]
    if pdb_meta is None:
        put_error('PDB file missing CRYST1 record, starting from MR')
    if opt.no_hetatm or check_hetatm_x(wf.path(ini_pdb), pdb_meta):
        if not opt.no_hetatm:
            comment('\nHETATM marked as element X would choke many programs.')
        rb_xyzin = 'prepared.pdb'
        wf.temporary_files.add(rb_xyzin)
        n_het = wf.remove_hetatm(xyzin=ini_pdb,
                                 xyzout=rb_xyzin,
                                 remove_all=opt.no_hetatm)
        comment('\nRemoved %d HETATM atoms' % n_het)
    else:
        rb_xyzin = ini_pdb
    # run rwcontents even without CRYST1 - it will show mol. weight only
    wf.rwcontents(xyzin=rb_xyzin).run()
    rw_data = wf.jobs[-1].data
    if pdb_meta is None:
        pass  # we already had a warning message
    elif rw_data.get('solvent_percent') is None:
        put_error('rwcontents could not interpret %s' % rb_xyzin)
    elif rw_data['solvent_percent'] > HIGH_SOLVENT_PCT:
        comment('\nHmm... %.1f%% of solvent or incomplete model' %
                rw_data['solvent_percent'])
        if abs(wf.jobs[-1].data.get('volume', 0) - pdb_meta.get_volume()) > 10:
            comment('\ndebug: problem when calculating volume?')

    ####### pointless - reindexing #######
    if match_symmetry(mtz_meta, pdb_meta) and opt.mr_when_r > 0 and (
            0.7 < mtz_meta.get_volume() / pdb_meta.get_volume() < 1.4):
        reindexed_mtz = 'pointless.mtz'
        wf.temporary_files.add(reindexed_mtz)
        wf.pointless(hklin=opt.mtz,
                     xyzin=rb_xyzin,
                     hklout=reindexed_mtz,
                     keys='TOLERANCE 5').run(may_fail=True)
        alt_reindex = wf.jobs[-1].data.get('alt_reindex')
        if wf.jobs[-1].exit_status == 0 and alt_reindex:
            for ar in alt_reindex:
                comment('\n    %-10s CC: %-8.3f cell diff: %.1fA' %
                        (ar['op'], ar['cc'], ar['cell_deviat']))
        else:
            # until recently (2015) pointless didn't print CC for non-ambiguous
            # spacegroups (e.g. C2), but now it always prints
            comment('\n    no good indexing')
            reindexed_mtz = opt.mtz
    else:
        reindexed_mtz = opt.mtz
    reindexed_mtz_meta = wf.read_mtz_metadata(reindexed_mtz)
    if reindexed_mtz_meta.symmetry != mtz_meta.symmetry:
        _comment_summary_line('reindexed MTZ', reindexed_mtz_meta)

    ####### (c)truncate - calculate amplitudes if needed #######
    if not opt.fcolumn:
        opt.fcolumn = 'F' if 'F' in mtz_meta.columns else 'FP'
    elif opt.icolumn or opt.ItoF_prog:
        put_error('Ignoring options --fcolumn/--sigfcolumn')
    opt.sigfcolumn = opt.sigfcolumn.replace('<FCOL>', opt.fcolumn)
    if (opt.ItoF_prog or opt.icolumn or opt.fcolumn not in mtz_meta.columns
            or opt.sigfcolumn not in mtz_meta.columns):
        f_mtz = 'amplit.mtz'
        wf.temporary_files.add(f_mtz)
        i_sigi_cols = _find_i_sigi_columns(mtz_meta, opt)
        if opt.ItoF_prog == 'ctruncate' or (opt.ItoF_prog is None
                                            and opt.slow):
            colano = None
            if opt.anode and all(
                    col in mtz_meta.columns
                    for col in ['I(+)', 'SIGI(+)', 'I(-)', 'SIGI(-)']):
                colano = '/*/*/[I(+),SIGI(+),I(-),SIGI(-)]'
            wf.ctruncate(hklin=reindexed_mtz,
                         hklout=f_mtz,
                         colin='/*/*/[%s,%s]' % i_sigi_cols,
                         colano=colano).run()
        else:
            wf.truncate(hklin=reindexed_mtz,
                        hklout=f_mtz,
                        labin='IMEAN=%s SIGIMEAN=%s' % i_sigi_cols,
                        labout='F=F SIGF=SIGF').run()
        opt.fcolumn = 'F'
        opt.sigfcolumn = 'SIGF'
    else:
        f_mtz = reindexed_mtz

    ####### rigid body - check if model is good for refinement? #######
    refmac_labin_nofree = 'FP=%s SIGFP=%s' % (opt.fcolumn, opt.sigfcolumn)
    refmac_xyzin = None
    cell_diff = calculate_difference(pdb_meta, reindexed_mtz_meta)
    if pdb_meta is None:
        pass  # the error message was already printed
    elif opt.mr_when_r <= 0:
        comment('\nMR requested unconditionally.')
    elif cell_diff > 0.1 and opt.mr_when_r < 1:
        comment('\nDifferent unit cells.')
    elif pdb_meta.symmetry != reindexed_mtz_meta.symmetry:
        comment('\nDifferent space groups.')
    else:
        comment('\nRigid-body refinement with resolution 3.5 A, %d cycles.' %
                opt.rigid_cycles)
        if 'aa_count' in rw_data and 'water_count' in rw_data:
            if rw_data['aa_count'] != 0:
                comment(' %.1f waters/aa.' %
                        (rw_data['water_count'] / rw_data['aa_count']))
            else:
                comment(' %d/0 waters/aa.' % rw_data['water_count'])
        wf.temporary_files |= {'refmacRB.pdb', 'refmacRB.mtz'}
        # it may fail because of "Disagreement between mtz and pdb"
        wf.refmac5(hklin=f_mtz,
                   xyzin=rb_xyzin,
                   hklout='refmacRB.mtz',
                   xyzout='refmacRB.pdb',
                   labin=refmac_labin_nofree,
                   libin=None,
                   keys="""refinement type rigidbody resolution 15 3.5
                           rigidbody ncycle %d""" %
                   opt.rigid_cycles).run(may_fail=True)
        # if the error is caused by mtz/pdb disagreement, continue with MR
        if wf.jobs[-1].exit_status != 0:
            comment('\nTry MR.')
        elif not wf.jobs[-1].data.get('overall_r'):
            comment('\nWARNING: unknown R factor, something went wrong.\n')
            refmac_xyzin = 'refmacRB.pdb'
        elif wf.jobs[-1].data['overall_r'] > opt.mr_when_r:
            comment('\nRun MR for R > %g.' % opt.mr_when_r)
        else:
            comment('\nNo MR for R < %g.' % opt.mr_when_r)
            refmac_xyzin = 'refmacRB.pdb'

    ####### phaser/molrep - molecular replacement #######
    if refmac_xyzin is None:
        vol_ratio = None
        if pdb_meta:
            # num_mol accounts for strict NCS (MTRIX without iGiven)
            vol_ratio = (mtz_meta.asu_volume() /
                         pdb_meta.asu_volume(rw_data['num_mol']))
            comment(' Volume of asu: %.1f%% of model asu.' % (100 * vol_ratio))
        if opt.mr_when_r >= 1:
            comment('\nWould try MR, but it is disabled.')
            return
        if opt.mr_num:
            mr_num = opt.mr_num
        else:
            mr_num = guess_number_of_molecules(mtz_meta, rw_data, vol_ratio)
        mw = rw_data.get('weight')
        if isinstance(mr_num, float):
            wf.ensembler(pdbin=rb_xyzin, root='ens').run()
            n_models = len(wf.jobs[-1].data['models'])
            mw = None
            rb_xyzin = 'ens_merged.pdb'
            mr_num = max(int(round(mr_num * n_models)), 1)
        # phaser is used by default if number of searched molecules is known
        if opt.mr_prog == 'molrep':
            wf.temporary_files |= {
                'molrep.pdb', 'molrep_dimer.pdb', 'molrep.crd'
            }
            wf.molrep(f=f_mtz, m=rb_xyzin).run()
            refmac_xyzin = 'molrep.pdb'
        else:
            wf.temporary_files |= {'phaser.1.pdb', 'phaser.1.mtz'}
            wf.phaser_auto(hklin=f_mtz,
                           labin='F=%s SIGF=%s' %
                           (opt.fcolumn, opt.sigfcolumn),
                           model=dict(pdb=rb_xyzin,
                                      identity=100,
                                      num=mr_num,
                                      mw=mw),
                           sg_alt='ALL',
                           opt=opt,
                           root='phaser').run(may_fail=True)
            if not _after_phaser_comments(wf.jobs[-1],
                                          sg_in=reindexed_mtz_meta.symmetry):
                raise RuntimeError('No phaser solution.')
            refmac_xyzin = 'phaser.1.pdb'
            f_mtz = 'phaser.1.mtz'

    if False:
        wf.findwaters(pdbin=refmac_xyzin,
                      hklin=f_mtz,
                      f='FC',
                      phi='PHIC',
                      pdbout='prepared_wat.pdb',
                      sigma=2)
        refmac_xyzin = 'prepared_wat.pdb'

    ####### adding free-R flags #######
    f_mtz_meta = wf.read_mtz_metadata(f_mtz)
    cad_reso = opt.reso or (f_mtz_meta.dmax - MtzMeta.d_eps)
    if opt.free_r_flags:
        free_mtz = opt.free_r_flags
        free_col = check_freerflags_column(wf.path(free_mtz),
                                           expected_symmetry=pdb_meta,
                                           column=opt.freecolumn)
        comment('\nFree-R flags from the %s file, column %s.' %
                (('reference' if free_mtz != opt.mtz else 'input'), free_col))
    else:
        free_col = DEFAULT_FREE_COLS[0]
        if free_col in f_mtz_meta.columns:
            comment('\nReplace free-R flags')
        else:
            comment('\nGenerate free-R flags')
        free_mtz = 'free.mtz'
        wf.temporary_files |= {'unique.mtz', free_mtz}
        if opt.seed_freerflag or cell_diff > 1e3:  # i.e. different SG
            wf.unique(hklout='unique.mtz', ref=f_mtz_meta,
                      resolution=cad_reso).run()
        else:
            comment(' (repeatably)')
            # Here we'd like to have always the same set of free-r flags
            # for given PDB file. That's why we don't use information
            # from the data file (mtz).
            wf.unique(hklout='unique.mtz', ref=pdb_meta, resolution=1.0).run()
        # CCP4 freerflag uses always the same pseudo-random sequence by default
        wf.freerflag(hklin='unique.mtz',
                     hklout=free_mtz,
                     keys=('SEED' if opt.seed_freerflag else '')).run()

    if free_mtz == opt.mtz and opt.reso is None:
        prepared_mtz = f_mtz
    else:
        prepared_mtz = 'prepared.mtz'
        wf.temporary_files.add(prepared_mtz)
        wf.cad(
            data_in=[(f_mtz, [c for c in f_mtz_meta.columns if c != free_col]),
                     (free_mtz, [free_col])],
            hklout=prepared_mtz,
            keys=[
                'sysab_keep',  # does it matter?
                'reso overall 1000.0 %g' % cad_reso
            ]).run()
    freerflag_missing = wf.count_mtz_missing(prepared_mtz, free_col)
    if freerflag_missing:
        wf.freerflag(hklin=prepared_mtz,
                     hklout='prepared2.mtz',
                     keys='COMPLETE FREE=' + free_col,
                     parser=' (again, for %d refl. more)' %
                     freerflag_missing).run()
        prepared_mtz = 'prepared2.mtz'
        wf.temporary_files.add(prepared_mtz)

    ####### refinement #######
    if opt.weight:
        refmac_weight = 'matrix %f' % opt.weight
    else:
        refmac_weight = 'auto'
    restr_ref_keys = """\
     make newligand continue
     refinement type restrained
     weight %s
     """ % refmac_weight
    if opt.freecolumn_val:
        restr_ref_keys += 'free %s\n' % opt.freecolumn_val
    refmac_labin = '%s FREE=%s' % (refmac_labin_nofree, free_col)
    comment('\nRestrained refinement, %d+%d cycles.' %
            (opt.jelly, opt.restr_cycles))
    if opt.jelly:
        wf.temporary_files |= {'jelly.pdb', 'jelly.mtz'}
        wf.refmac5(hklin=prepared_mtz,
                   xyzin=refmac_xyzin,
                   hklout='jelly.mtz',
                   xyzout='jelly.pdb',
                   labin=refmac_labin,
                   libin=opt.libin,
                   keys=restr_ref_keys + 'ridge distance sigma 0.01\n'
                   'make hydrogen no\n'
                   'ncycle %d' % opt.jelly + opt.extra_ref_keys).run()
        comment(_refmac_rms_line(wf.jobs[-1].data))
        refmac_xyzin = 'jelly.pdb'
    restr_job = wf.refmac5(
        hklin=prepared_mtz,
        xyzin=refmac_xyzin,
        hklout=opt.hklout,
        xyzout=opt.xyzout,
        labin=refmac_labin,
        libin=opt.libin,
        keys=(restr_ref_keys + 'ncycle %d' % opt.restr_cycles +
              opt.extra_ref_keys)).run()
    comment(_refmac_rms_line(restr_job.data))
    # if that run is repeated with --from-step it's useful to compare Rfree
    if wf.from_job > 0 and wf.from_job <= len(wf.jobs):  # from_job is 1-based
        prev = [j for j in wf.repl_jobs if j.name == restr_job.name]
        if prev and prev[0].data and 'free_r' in prev[0].data:
            comment('\nPreviously:  R/Rfree %.4f/%.4f  Rfree change: %+.4f' %
                    (prev[0].data['overall_r'], prev[0].data['free_r'],
                     restr_job.data['free_r'] - prev[0].data['free_r']))

    ####### check blobs #######
    if opt.blob_search:
        if restr_job.data['free_r'] <= BAD_FINAL_RFREE:
            fb_job = wf.find_blobs(opt.hklout, opt.xyzout, sigma=0.8).run()
            coot_script = _generate_scripts_and_pictures(wf, opt, fb_job.data)
            if coot_script:
                comment('\nTo see it in Coot run %s' % coot_script)
        else:
            comment('\nNo blob search for Rfree > %g.' % BAD_FINAL_RFREE)
            _generate_scripts_and_pictures(wf, opt, None)

    if opt.anode:
        # check if mtz contains I+/- and SIGI+/-
        column_types = list(reindexed_mtz_meta.columns.values())
        if column_types.count('K') != 2 and column_types.count('M') != 2:
            comment('\nColumns I+/- and SIG+/- not found. Skipping AnoDe.')
            return
        anode_name = 'anode'
        # convert to sca for input to shelxc
        scaout = anode_name + '.sca'
        wf.mtz2sca(prepared_mtz, scaout).run()

        wf.shelxc(scaout, reindexed_mtz_meta.cell,
                  reindexed_mtz_meta.symmetry).run()

        wf.copy_uncompressed(opt.xyzout, anode_name + '.pdb')
        anode_job = wf.anode(anode_name).run()
        wf.temporary_files |= {
            scaout, anode_name + '.pdb', anode_name + '.hkl',
            anode_name + '.pha', anode_name + '_sad.cif',
            anode_name + '_fa.hkl'
        }
        cell = Cell(reindexed_mtz_meta.cell, reindexed_mtz_meta.symmetry)
        # need orthogonal not fractional coordinates to generate coot script
        anode_job.data['blobs'] = cell.orthogonalize(anode_job.data['xyz'])
        comment(_anode_anom_peak_lines(anode_job.data))
        coot_script = _generate_scripts_and_pictures(wf,
                                                     opt,
                                                     anode_job.data,
                                                     pha=anode_name + '.pha')
Ejemplo n.º 38
0
Archivo: main.py Proyecto: ccp4/dimple
def dimple(wf, opt):
    comment("     ### Dimple v%s. Problems and suggestions:"
            " ccp4.github.io/dimple ###" % __version__)
    mtz_meta = wf.read_mtz_metadata(opt.mtz)
    _comment_summary_line("MTZ (%.1fA)" % mtz_meta.dmax, mtz_meta)
    if opt.dls_naming:
        opt.pdbs = dls_name_filter(opt.pdbs)
    opt.pdbs = utils.filter_out_duplicate_files(opt.pdbs, relto=opt.output_dir)
    if not opt.pdbs:
        comment("\nNo non-empty pdb files given. Nothing to do.")
        return
    for p in opt.pdbs:
        wf.read_pdb_metadata(p, print_errors=(len(opt.pdbs) > 1))
    if len(opt.pdbs) > 1:
        comment("\nPDBs in order of similarity (using the first one):")
        opt.pdbs.sort(key=lambda x: calculate_difference(wf.file_info[x],
                                                         mtz_meta))
    utils.log_value("data_file", opt.mtz)
    utils.log_value("pdb_files", opt.pdbs)
    for p in opt.pdbs:
        _comment_summary_line(os.path.basename(p), wf.file_info[p])
    ini_pdb = "ini.pdb"
    wf.copy_uncompressed(opt.pdbs[0], ini_pdb)
    pdb_meta = wf.file_info[opt.pdbs[0]]
    if pdb_meta is None:
        put_error("PDB file missing CRYST1 record, starting from MR")
    if opt.no_hetatm or check_hetatm_x(wf.path(ini_pdb), pdb_meta):
        if not opt.no_hetatm:
            comment("\nHETATM marked as element X would choke many programs.")
        rb_xyzin = "prepared.pdb"
        wf.temporary_files.add(rb_xyzin)
        n_het = wf.remove_hetatm(xyzin=ini_pdb, xyzout=rb_xyzin,
                                 remove_all=opt.no_hetatm)
        comment("\nRemoved %d HETATM atoms" % n_het)
    else:
        rb_xyzin = ini_pdb
    # run rwcontents even without CRYST1 - it will show mol. weight only
    wf.rwcontents(xyzin=rb_xyzin).run()
    rw_data = wf.jobs[-1].data
    if pdb_meta is None:
        pass # we already had a warning message
    elif rw_data.get('solvent_percent') is None:
        put_error("rwcontents could not interpret %s" % rb_xyzin)
    elif rw_data['solvent_percent'] > HIGH_SOLVENT_PCT:
        comment("\nHmm... %.1f%% of solvent or incomplete model" %
                rw_data['solvent_percent'])
        if abs(wf.jobs[-1].data.get('volume', 0) - pdb_meta.get_volume()) > 10:
            comment("\ndebug: problem when calculating volume?")

    ####### pointless - reindexing #######
    if match_symmetry(mtz_meta, pdb_meta) and opt.mr_when_r > 0 and (
            0.7 < mtz_meta.get_volume() / pdb_meta.get_volume() < 1.4):
        reindexed_mtz = "pointless.mtz"
        wf.temporary_files.add(reindexed_mtz)
        wf.pointless(hklin=opt.mtz, xyzin=rb_xyzin, hklout=reindexed_mtz,
                     keys="TOLERANCE 5").run(may_fail=True)
        alt_reindex = wf.jobs[-1].data.get('alt_reindex')
        if wf.jobs[-1].exit_status == 0 and alt_reindex:
            for ar in alt_reindex:
                comment("\n    %-10s CC: %-8.3f cell diff: %.1fA" % (
                        ar['op'], ar['cc'], ar['cell_deviat']))
        else:
            # until recently (2015) pointless didn't print CC for non-ambiguous
            # spacegroups (e.g. C2), but now it always prints
            comment("\n    no good indexing")
            reindexed_mtz = opt.mtz
    else:
        reindexed_mtz = opt.mtz
    reindexed_mtz_meta = wf.read_mtz_metadata(reindexed_mtz)
    if reindexed_mtz_meta.symmetry != mtz_meta.symmetry:
        _comment_summary_line('reindexed MTZ', reindexed_mtz_meta)

    ####### (c)truncate - calculate amplitudes if needed #######
    if not opt.fcolumn:
        opt.fcolumn = 'F' if 'F' in mtz_meta.columns else 'FP'
    elif opt.icolumn or opt.ItoF_prog:
        put_error('Ignoring options --fcolumn/--sigfcolumn')
    opt.sigfcolumn = opt.sigfcolumn.replace('<FCOL>', opt.fcolumn)
    if (opt.ItoF_prog or opt.icolumn or opt.fcolumn not in mtz_meta.columns
                                  or opt.sigfcolumn not in mtz_meta.columns):
        mtz_meta.check_col_type(opt.icolumn or 'IMEAN', 'J')
        mtz_meta.check_col_type(opt.sigicolumn, 'Q')
        f_mtz = "amplit.mtz"
        wf.temporary_files.add(f_mtz)
        i_sigi_cols = (opt.icolumn or 'IMEAN', opt.sigicolumn)
        if opt.ItoF_prog == 'ctruncate' or (opt.ItoF_prog is None and opt.slow):
            wf.ctruncate(hklin=reindexed_mtz, hklout=f_mtz,
                         colin="/*/*/[%s,%s]" % i_sigi_cols).run()
        else:
            wf.truncate(hklin=reindexed_mtz, hklout=f_mtz,
                        labin="IMEAN=%s SIGIMEAN=%s" % i_sigi_cols,
                        labout="F=F SIGF=SIGF").run()
        opt.fcolumn = 'F'
        opt.sigfcolumn = 'SIGF'
    else:
        f_mtz = reindexed_mtz

    ####### rigid body - check if model is good for refinement? #######
    refmac_labin_nofree = "FP=%s SIGFP=%s" % (opt.fcolumn, opt.sigfcolumn)
    refmac_xyzin = None
    cell_diff = calculate_difference(pdb_meta, reindexed_mtz_meta)
    if pdb_meta is None:
        pass # the error message was already printed
    elif opt.mr_when_r <= 0:
        comment("\nMR requested unconditionally.")
    elif cell_diff > 0.1 and opt.mr_when_r < 1:
        comment("\nDifferent unit cells.")
    elif pdb_meta.symmetry != reindexed_mtz_meta.symmetry:
        comment("\nDifferent space groups.")
    else:
        comment("\nRigid-body refinement with resolution 3.5 A, 10 cycles.")
        if 'aa_count' in rw_data and 'water_count' in rw_data:
            comment(" %.1f waters/aa." % (rw_data['water_count'] /
                                        rw_data['aa_count']))
        wf.temporary_files |= {"refmacRB.pdb", "refmacRB.mtz"}
        # it may fail because of "Disagreement between mtz and pdb"
        wf.refmac5(hklin=f_mtz, xyzin=rb_xyzin,
                   hklout="refmacRB.mtz", xyzout="refmacRB.pdb",
                   labin=refmac_labin_nofree,
                   libin=None,
                   keys="""refinement type rigidbody resolution 15 3.5
                           rigidbody ncycle 10""").run(may_fail=True)
        # if the error is caused by mtz/pdb disagreement, continue with MR
        if wf.jobs[-1].exit_status != 0:
            comment("\nTry MR.")
        elif not wf.jobs[-1].data.get("overall_r"):
            comment("\nWARNING: unknown R factor, something went wrong.\n")
            refmac_xyzin = "refmacRB.pdb"
        elif wf.jobs[-1].data["overall_r"] > opt.mr_when_r:
            comment("\nRun MR for R > %g." % opt.mr_when_r)
        else:
            comment("\nNo MR for R < %g." % opt.mr_when_r)
            refmac_xyzin = "refmacRB.pdb"

    ####### phaser/molrep - molecular replacement #######
    if refmac_xyzin is None:
        vol_ratio = None
        if pdb_meta:
            # num_mol accounts for strict NCS (MTRIX without iGiven)
            vol_ratio = (mtz_meta.asu_volume() /
                         pdb_meta.asu_volume(rw_data['num_mol']))
            comment(" Volume of asu: %.1f%% of model asu." % (100 * vol_ratio))
        if opt.mr_when_r >= 1:
            comment("\nWould try MR, but it is disabled.")
            return
        if opt.mr_num:
            mr_num = opt.mr_num
        else:
            mr_num = guess_number_of_molecules(mtz_meta, rw_data, vol_ratio)
        mw = rw_data.get('weight')
        if isinstance(mr_num, float):
            wf.ensembler(pdbin=rb_xyzin, root='ens').run()
            n_models = len(wf.jobs[-1].data['models'])
            mw = None
            rb_xyzin = "ens_merged.pdb"
            mr_num = max(int(round(mr_num * n_models)), 1)
        # phaser is used by default if number of searched molecules is known
        if opt.mr_prog == 'molrep':
            wf.temporary_files |= {"molrep.pdb", "molrep_dimer.pdb",
                                   "molrep.crd"}
            wf.molrep(f=f_mtz, m=rb_xyzin).run()
            refmac_xyzin = "molrep.pdb"
        else:
            wf.temporary_files |= {"phaser.1.pdb", "phaser.1.mtz"}
            wf.phaser_auto(hklin=f_mtz,
                           labin="F=%s SIGF=%s" % (opt.fcolumn, opt.sigfcolumn),
                           model=dict(pdb=rb_xyzin, identity=100, num=mr_num,
                                      mw=mw),
                           sg_alt="ALL", opt=opt,
                           root='phaser').run(may_fail=True)
            if not _after_phaser_comments(wf.jobs[-1],
                                          sg_in=reindexed_mtz_meta.symmetry):
                return
            refmac_xyzin = "phaser.1.pdb"
            f_mtz = "phaser.1.mtz"

    if False:
        wf.findwaters(pdbin=refmac_xyzin, hklin=f_mtz,
                      f="FC", phi="PHIC", pdbout="prepared_wat.pdb", sigma=2)
        refmac_xyzin = "prepared_wat.pdb"

    ####### adding free-R flags #######
    f_mtz_meta = wf.read_mtz_metadata(f_mtz)
    cad_reso = opt.reso or (f_mtz_meta.dmax - MtzMeta.d_eps)
    if opt.free_r_flags:
        free_mtz = opt.free_r_flags
        free_col = check_freerflags_column(wf.path(free_mtz),
                                           expected_symmetry=pdb_meta,
                                           column=opt.freecolumn)
        comment("\nFree-R flags from the %s file, column %s." %
                (("reference" if free_mtz != opt.mtz else 'input'), free_col))
    else:
        free_col = DEFAULT_FREE_COLS[0]
        if free_col in f_mtz_meta.columns:
            comment("\nReplace free-R flags")
        else:
            comment("\nGenerate free-R flags")
        free_mtz = "free.mtz"
        wf.temporary_files |= {"unique.mtz", free_mtz}
        if opt.seed_freerflag or cell_diff > 1e3: # i.e. different SG
            wf.unique(hklout="unique.mtz", ref=f_mtz_meta,
                      resolution=cad_reso).run()
        else:
            comment(" (repeatably)")
            # Here we'd like to have always the same set of free-r flags
            # for given PDB file. That's why we don't use information
            # from the data file (mtz).
            wf.unique(hklout="unique.mtz", ref=pdb_meta, resolution=1.0).run()
        # CCP4 freerflag uses always the same pseudo-random sequence by default
        wf.freerflag(hklin="unique.mtz", hklout=free_mtz,
                     keys=("SEED" if opt.seed_freerflag else "")).run()

    if free_mtz == opt.mtz and opt.reso is None:
        prepared_mtz = f_mtz
    else:
        prepared_mtz = "prepared.mtz"
        wf.temporary_files.add(prepared_mtz)
        wf.cad(data_in=[(f_mtz,
                         [c for c in f_mtz_meta.columns if c != free_col]),
                        (free_mtz, [free_col])],
               hklout=prepared_mtz,
               keys=["sysab_keep",  # does it matter?
                     "reso overall 1000.0 %g" % cad_reso]).run()
    freerflag_missing = wf.count_mtz_missing(prepared_mtz, free_col)
    if freerflag_missing:
        wf.freerflag(hklin=prepared_mtz, hklout="prepared2.mtz",
                     keys="COMPLETE FREE="+free_col,
                     parser=" (again, for %d refl. more)" % freerflag_missing
                    ).run()
        prepared_mtz = "prepared2.mtz"
        wf.temporary_files.add(prepared_mtz)

    ####### refinement #######
    if opt.weight:
        refmac_weight = "matrix %f" % opt.weight
    else:
        refmac_weight = "auto"
    restr_ref_keys = """\
     make newligand continue
     refinement type restrained
     weight %s
     """ % refmac_weight
    if opt.freecolumn_val:
        restr_ref_keys += "free %s\n" % opt.freecolumn_val
    refmac_labin = "%s FREE=%s" % (refmac_labin_nofree, free_col)
    comment("\nRestrained refinement, %d+%d cycles." % (opt.jelly,
                                                        opt.restr_cycles))
    if opt.jelly:
        wf.temporary_files |= {"jelly.pdb", "jelly.mtz"}
        wf.refmac5(hklin=prepared_mtz, xyzin=refmac_xyzin,
                   hklout="jelly.mtz", xyzout="jelly.pdb",
                   labin=refmac_labin, libin=opt.libin,
                   keys=restr_ref_keys+"ridge distance sigma 0.01\n"
                                       "make hydrogen no\n"
                                       "ncycle %d" % opt.jelly).run()
        comment(_refmac_rms_line(wf.jobs[-1].data))
        refmac_xyzin = "jelly.pdb"
    restr_job = wf.refmac5(hklin=prepared_mtz, xyzin=refmac_xyzin,
                 hklout=opt.hklout, xyzout=opt.xyzout,
                 labin=refmac_labin, libin=opt.libin,
                 keys=restr_ref_keys+("ncycle %d" % opt.restr_cycles)).run()
    comment(_refmac_rms_line(restr_job.data))
    # if that run is repeated with --from-step it's useful to compare Rfree
    if wf.from_job > 0 and wf.from_job <= len(wf.jobs): # from_job is 1-based
        prev = [j for j in wf.repl_jobs if j.name == restr_job.name]
        if prev and prev[0].data and "free_r" in prev[0].data:
            comment("\nPreviously:  R/Rfree %.4f/%.4f  Rfree change: %+.4f" % (
                    prev[0].data["overall_r"], prev[0].data["free_r"],
                    restr_job.data["free_r"] - prev[0].data["free_r"]))

    ####### check blobs and finish #######
    if restr_job.data["free_r"] <= BAD_FINAL_RFREE:
        fb_job = wf.find_blobs(opt.hklout, opt.xyzout, sigma=0.8).run()
        coot_script = _generate_scripts_and_pictures(wf, opt, fb_job.data)
        if coot_script:
            comment("\nTo see it in Coot run %s" % coot_script)
    else:
        comment("\nGiving up (Rfree > %g). No blob search." % BAD_FINAL_RFREE)
        _generate_scripts_and_pictures(wf, opt, None)
Ejemplo n.º 39
0
Archivo: main.py Proyecto: ccp4/dimple
def _generate_scripts_and_pictures(wf, opt, data):
    blobs = data["blobs"] if data else []
    if not blobs:
        comment("\nUnmodelled blobs not found.")
    elif opt.img_format and _check_picture_tools():
        if len(blobs) == 1:
            comment("\nRendering density blob at (%.1f, %.1f, %.1f)" %
                    blobs[0])
        else:
            comment("\nRendering 2 largest blobs: at (%.1f, %.1f, %.1f) "
                    "and at (%.1f, %.1f, %.1f)" % (blobs[0]+blobs[1]))
    com = data and data["center"]

    # run-coot.py centers on the biggest blob. It uses relative paths -
    # it can be run only from the output directory, but is not affected
    # by moving that directory to different location.
    # There are blobN-coot.py scripts generated below with absolute paths.
    # write coot script (apart from pictures) that centers on the biggest blob
    script_path = os.path.join(wf.output_dir, "run-coot.py")
    script = coots.basic_script(pdb=opt.xyzout, mtz=opt.hklout,
                               center=(blobs and blobs[0]), toward=com)
    _write_script(script_path, script, executable=True)

    # blob images, for now for not more than two blobs
    d = os.path.abspath(wf.output_dir)
    for n, b in enumerate(blobs[:2]):
        py_path = os.path.join(wf.output_dir, "blob%d-coot.py" % (n+1))
        content = coots.basic_script(pdb=os.path.join(d, opt.xyzout),
                                     mtz=os.path.join(d, opt.hklout),
                                     center=blobs[n], toward=com)
        _write_script(py_path, content)
    # coot.sh - one-line script for convenience
    if blobs:
        coot_sh_text = '{coot} --no-guano {out}/blob1-coot.py\n'
    else:
        coot_sh_text = '{coot} --no-guano {out}/final.mtz {out}/final.pdb\n'
    coot_sh_path = os.path.join(wf.output_dir, "coot.sh")
    _write_script(coot_sh_path, coot_sh_text.format(coot=coots.find_path(),
                                                    out=wf.output_dir),
                  executable=True)

    if opt.img_format and blobs:
        script = ''
        basenames = []
        # as a workaround for buggy coot the maps are reloaded for each blob
        for n, b in enumerate(blobs[:2]):
            script += coots.basic_script(pdb=opt.xyzout, mtz=opt.hklout,
                                         center=b, toward=com)
            rs, names = coots.r3d_script(b, com, blobname="blob%s"%(n+1))
            script += rs
            basenames += names
        coot_job = wf.coot_py(script)
        try:
            coot_job.run()
        except workflow.JobError:
            # check for a possible cause to hint the user
            # (possible workaround: change $HOME to non-existing directory)
            if utils.silently_run(coot_job.args, cwd=wf.output_dir)[0] != 0:
                put_error("coot fails with options: --no-graphics --python",
                          comment="It happens when scripts in .coot or "
                                  ".coot-preferences are not compatible\n"
                                  "with the --no-graphics mode.")
            raise
        for n, basename in enumerate(basenames):
            job = wf.render_r3d(basename, img_format=opt.img_format)
            if n % 3 == 0:
                job.run()
            else: # minimal output
                job.run(show_progress=False, new_line=False)
        wf.delete_files([name+".r3d" for name in basenames])
    return coot_sh_path