Example #1
0
def create_constraints_file(preminimization_log, outfile_path):
    '''This does the work of the convert_to_cst_file.sh script in the Rosetta repository.'''
    constraints = []
    contents = read_file(preminimization_log)
    for line in contents.split('\n'):
        if line.startswith("c-alpha"):
            line = line.split()
            constraints.append("AtomPair CA %s CA %s HARMONIC %s %s" % (line[5], line[7], line[9], line[12]))
    write_file(outfile_path, '\n'.join(constraints))
    return outfile_path
Example #2
0
def create_constraints_file(preminimization_log, outfile_path):
    '''This does the work of the convert_to_cst_file.sh script in the Rosetta repository.'''
    constraints = []
    contents = read_file(preminimization_log)
    for line in contents.split('\n'):
        if line.startswith("c-alpha"):
            line = line.split()
            constraints.append("AtomPair CA %s CA %s HARMONIC %s %s" %
                               (line[5], line[7], line[9], line[12]))
    write_file(outfile_path, '\n'.join(constraints))
    return outfile_path
Example #3
0
def update_pdbs_json():
    '''This function was used to update the pdbs.json file to include chain sequences and types.'''
    pdb_data = {}
    pdb_data_ = json.loads(read_file(os.path.join('..', 'json', 'pdbs.json')))
    for k, v in pdb_data_.items():
        assert(len(k) == 4)
        newk = k.upper()
        pdb = PDB(read_file(os.path.join('..', 'pdbs', newk + '.pdb')))
        chain_ids = set(pdb.chain_types.keys()).union(set(pdb.seqres_chain_order)).union(set(pdb.atom_sequences.keys()))
        v['Chains'] = dict.fromkeys(chain_ids)
        for chain_id in chain_ids:
            v['Chains'][chain_id] = dict(
                Sequence = str(pdb.atom_sequences.get(chain_id)),
                Type = pdb.chain_types.get(chain_id),
            )
        pdb_data[newk] = v
    write_file(os.path.join('..', 'json', 'pdbs.json.new'), json.dumps(pdb_data, indent = 4, sort_keys=True))
Example #4
0
def update_pdbs_json():
    '''This function was used to update the pdbs.json file to include chain sequences and types.'''
    pdb_data = {}
    pdb_data_ = json.loads(read_file(os.path.join('..', 'json', 'pdbs.json')))
    for k, v in pdb_data_.iteritems():
        assert(len(k) == 4)
        newk = k.upper()
        pdb = PDB(read_file(os.path.join('..', 'pdbs', newk + '.pdb')))
        chain_ids = set(pdb.chain_types.keys()).union(set(pdb.seqres_chain_order)).union(set(pdb.atom_sequences.keys()))
        v['Chains'] = dict.fromkeys(chain_ids)
        for chain_id in chain_ids:
            v['Chains'][chain_id] = dict(
                Sequence = str(pdb.atom_sequences.get(chain_id)),
                Type = pdb.chain_types.get(chain_id),
            )
        pdb_data[newk] = v
    write_file(os.path.join('..', 'json', 'pdbs.json.new'), json.dumps(pdb_data, indent = 4, sort_keys=True))
Example #5
0
    for chain_id, sequence in stripped_pdb.atom_sequences.iteritems():
        assert(len(sequence) > 0)

    # Check for CSE and MSE
    try:
        if 'CSE' in stripped_pdb.residue_types:
            raise Exception('This case contains a CSE residue which may (or may not) cause an issue with Rosetta depending on the version.')
        elif 'MSE' in stripped_pdb.residue_types:
            raise Exception('This case contains an MSE residue which may (or may not) cause an issue with Rosetta depending on the version.')
            # It looks like MSE (and CSE?) may now be handled - https://www.rosettacommons.org/content/pdb-files-rosetta-format
    except Exception, e:
        print('%s: %s, chain %s' % (str(e), str(stripped_pdb.pdb_id), chain))

    # Turn the lines array back into a valid PDB file
    if not(skip_if_exists) or not(os.path.exists(stripped_pdb_path)):
        write_file(stripped_pdb_path, '\n'.join(stripped_pdb.lines))

    # Create the mapping between PDB and Rosetta residue numbering
    # Note: In many Rosetta protocols, '-ignore_unrecognized_res' and '-ignore_zero_occupancy false' are used to allow
    # Rosetta to work with structures with missing data and non-canonicals. In those cases, we should supply both flags
    # in the string below. Since protocol 16 only uses '-ignore_unrecognized_res', we only use that flag below as otherwise
    # we could break the mapping.
    rosetta_scripts_bin = os.path.join(settings['local_rosetta_bin'], 'rosetta_scripts%s' % settings['rosetta_binary_type'])
    rosetta_database_path = settings['local_rosetta_db_dir']
    if not os.path.exists(rosetta_scripts_bin):
        raise Exception('The Rosetta scripts executable "{0}" could not be found. Please check your configuration file.'.format(rosetta_database_path))
    if not os.path.exists(rosetta_database_path):
        raise Exception('The path to the Rosetta database "{0}" could not be found. Please check your configuration file.'.format(rosetta_database_path))
    stripped_pdb.construct_pdb_to_rosetta_residue_map(rosetta_scripts_bin,rosetta_database_path, extra_command_flags = '-ignore_unrecognized_res')
    atom_to_rosetta_residue_map = stripped_pdb.get_atom_sequence_to_rosetta_json_map()
    rosetta_to_atom_residue_map = stripped_pdb.get_rosetta_sequence_to_atom_json_map()
Example #6
0
    for chain_id, sequence in stripped_pdb.atom_sequences.iteritems():
        assert(len(sequence) > 0)

    # Check for CSE and MSE
    try:
        if 'CSE' in stripped_pdb.residue_types:
            raise Exception('This case contains a CSE residue which may (or may not) cause an issue with Rosetta depending on the version.')
        elif 'MSE' in stripped_pdb.residue_types:
            raise Exception('This case contains an MSE residue which may (or may not) cause an issue with Rosetta depending on the version.')
            # It looks like MSE (and CSE?) may now be handled - https://www.rosettacommons.org/content/pdb-files-rosetta-format
    except Exception, e:
        print('%s: %s, chain %s' % (str(e), str(stripped_pdb.pdb_id), chain))

    # Turn the lines array back into a valid PDB file
    if not(skip_if_exists) or not(os.path.exists(stripped_pdb_path)):
        write_file(stripped_pdb_path, '\n'.join(stripped_pdb.lines))

    # Create the mapping between PDB and Rosetta residue numbering
    # Note: In many Rosetta protocols, '-ignore_unrecognized_res' and '-ignore_zero_occupancy false' are used to allow
    # Rosetta to work with structures with missing data and non-canonicals. In those cases, we should supply both flags
    # in the string below. Since protocol 16 only uses '-ignore_unrecognized_res', we only use that flag below as otherwise
    # we could break the mapping.
    rosetta_scripts_bin = os.path.join(settings['local_rosetta_bin'], 'rosetta_scripts%s' % settings['rosetta_binary_type'])
    rosetta_database_path = settings['local_rosetta_db_dir']
    if not os.path.exists(rosetta_scripts_bin):
        raise Exception('The Rosetta scripts executable "{0}" could not be found. Please check your configuration file.'.format(rosetta_database_path))
    if not os.path.exists(rosetta_database_path):
        raise Exception('The path to the Rosetta database "{0}" could not be found. Please check your configuration file.'.format(rosetta_database_path))
    stripped_pdb.construct_pdb_to_rosetta_residue_map(rosetta_scripts_bin,rosetta_database_path, extra_command_flags = '-ignore_unrecognized_res')
    atom_to_rosetta_residue_map = stripped_pdb.get_atom_sequence_to_rosetta_json_map()
    rosetta_to_atom_residue_map = stripped_pdb.get_rosetta_sequence_to_atom_json_map()
Example #7
0
def create_input_files(job_dict,
                       settings,
                       pdb_dir_path,
                       pdb_data_dir,
                       mutfile_data_dir,
                       keypair,
                       dataset_cases,
                       skip_if_exists=False):
    '''Create the stripped PDB files and the mutfiles for the DDG step. Mutfiles are created at this point as we need the
    original PDB to generate the residue mapping.
    '''

    # Read PDB
    pdb_id = keypair[0]
    chain = keypair[1]
    pdb = PDB.from_filepath(pdb_dir_path)
    stripped_pdb_path = os.path.join(pdb_data_dir,
                                     '%s_%s.pdb' % (pdb_id, chain))

    # Strip the PDB to the list of chains. This also renumbers residues in the PDB for Rosetta.
    chains = [chain]
    pdb.strip_to_chains(chains)
    pdb.strip_HETATMs()
    stripped_pdb = PDB('\n'.join(pdb.lines))

    # Check to make sure that we haven't stripped all the ATOM lines
    if not [line for line in stripped_pdb.lines if line[0:4] == "ATOM"]:
        raise Exception("No ATOM lines remain in the stripped PDB file %s." %
                        stripped_pdb_path)

    # Assert that there are no empty sequences
    assert (sorted(stripped_pdb.atom_sequences.keys()) == sorted(chains))
    for chain_id, sequence in stripped_pdb.atom_sequences.items():
        assert (len(sequence) > 0)

    # Check for CSE and MSE
    try:
        if 'CSE' in stripped_pdb.residue_types:
            raise Exception(
                'This case contains a CSE residue which may (or may not) cause an issue with Rosetta depending on the version.'
            )
        elif 'MSE' in stripped_pdb.residue_types:
            raise Exception(
                'This case contains an MSE residue which may (or may not) cause an issue with Rosetta depending on the version.'
            )
            # It looks like MSE (and CSE?) may now be handled - https://www.rosettacommons.org/content/pdb-files-rosetta-format
    except Exception as e:
        print(('%s: %s, chain %s' % (str(e), str(stripped_pdb.pdb_id), chain)))

    # Turn the lines array back into a valid PDB file
    if not (skip_if_exists) or not (os.path.exists(stripped_pdb_path)):
        write_file(stripped_pdb_path, '\n'.join(stripped_pdb.lines))

    # Create the mapping between PDB and Rosetta residue numbering
    # Note: In many Rosetta protocols, '-ignore_unrecognized_res' and '-ignore_zero_occupancy false' are used to allow
    # Rosetta to work with structures with missing data and non-canonicals. In those cases, we should supply both flags
    # in the string below. Since protocol 16 only uses '-ignore_unrecognized_res', we only use that flag below as otherwise
    # we could break the mapping.
    rosetta_scripts_bin = os.path.join(
        settings['local_rosetta_bin'],
        'rosetta_scripts%s' % settings['rosetta_binary_type'])
    rosetta_database_path = settings['local_rosetta_db_dir']
    if not os.path.exists(rosetta_scripts_bin):
        raise Exception(
            'The Rosetta scripts executable "{0}" could not be found. Please check your configuration file.'
            .format(rosetta_database_path))
    if not os.path.exists(rosetta_database_path):
        raise Exception(
            'The path to the Rosetta database "{0}" could not be found. Please check your configuration file.'
            .format(rosetta_database_path))
    stripped_pdb.construct_pdb_to_rosetta_residue_map(
        rosetta_scripts_bin,
        rosetta_database_path,
        extra_command_flags='-ignore_unrecognized_res')
    atom_to_rosetta_residue_map = stripped_pdb.get_atom_sequence_to_rosetta_json_map(
    )
    rosetta_to_atom_residue_map = stripped_pdb.get_rosetta_sequence_to_atom_json_map(
    )

    # Save the PDB <-> Rosetta residue mappings to disk
    write_file(
        os.path.join(pdb_data_dir,
                     '%s_%s.rosetta2pdb.resmap.json' % (pdb_id, chain)),
        rosetta_to_atom_residue_map)
    write_file(
        os.path.join(pdb_data_dir,
                     '%s_%s.pdb2rosetta.resmap.json' % (pdb_id, chain)),
        atom_to_rosetta_residue_map)

    # Assert that there are no empty sequences in the Rosetta-processed PDB file
    total_num_residues = 0
    d = json.loads(rosetta_to_atom_residue_map)
    for chain_id in chains:
        num_chain_residues = len(
            [z for z in list(d.values()) if z[0] == chain_id])
        total_num_residues += num_chain_residues
        assert (num_chain_residues > 0)

    # Check that the mutated positions exist and that the wild-type matches the PDB
    try:
        for dataset_case in dataset_cases:
            assert (dataset_case['PDBFileID'] == pdb_id)

            # Note: I removed a hack here for 1AJ3->1U5P mapping
            # The JSON file does not have the residue IDs in PDB format (5 characters including insertion code) so we need to repad them for the mapping to work
            pdb_mutations = [
                ChainMutation(mutation['WildTypeAA'],
                              PDB.ResidueID2String(mutation['ResidueID']),
                              mutation['MutantAA'],
                              Chain=mutation['Chain'])
                for mutation in dataset_case['Mutations']
            ]
            stripped_pdb.validate_mutations(pdb_mutations)

            # Map the PDB mutations to Rosetta numbering which is used by the mutfile format
            rosetta_mutations = stripped_pdb.map_pdb_residues_to_rosetta_residues(
                pdb_mutations)
            if (len(rosetta_mutations) != len(pdb_mutations)) or (None in set(
                [m.ResidueID for m in rosetta_mutations])):
                raise Exception(
                    'An error occurred in the residue mapping code for DDG case: %s, %s'
                    % (pdb_id, pdb_mutations))

            # Create the mutfile
            mutfile = Mutfile.from_mutagenesis(rosetta_mutations)
            mutfilename = os.path.join(
                mutfile_data_dir, '%d.mutfile' % (dataset_case['RecordID']))
            if os.path.exists(mutfilename):
                raise Exception(
                    '%s already exists. Check that the RecordIDs in the JSON file are all unique.'
                    % mutfilename)
            write_file(
                os.path.join(mutfile_data_dir,
                             '%d.mutfile' % (dataset_case['RecordID'])),
                str(mutfile))
    except Exception as e:
        print((str(e)))
        print((traceback.format_exc()))

    # Set up --in:file:l parameter
    pdb_relpath = os.path.relpath(stripped_pdb_path, settings['output_dir'])
    job_dict[os.path.join(
        task_subfolder,
        '_'.join(keypair))] = dict(input_file_list=[pdb_relpath])
    sys.stdout.write('.')
    sys.stdout.flush()