Example #1
0
    def __init__(self, ident, pdb_parent, seq_record=None, description=None):
        Object.__init__(self, id=ident, description=description)

        self.pdb_parent = pdb_parent
        self.seq_record = seq_record

        if not self.description:
            self.description = 'Chain {} from PDB parent {}'.format(self.id, self.pdb_parent)
Example #2
0
    def __init__(self,
                 atlas_name,
                 root_dir,
                 reference_gempro,
                 reference_genome_path=None,
                 description=None):
        """Prepare a GEM-PRO model for ATLAS analysis

        Args:
            atlas_name (str): Name of your ATLAS project
            root_dir (str): Path to where the folder named after ``atlas_name`` will be created.
            reference_gempro (GEMPRO): GEM-PRO model to use as the reference genome
            reference_genome_path (str): Path to reference genome FASTA file
            description (str): Optional string to describe your project

        """
        Object.__init__(self, id=atlas_name, description=description)

        # Create directories
        self._root_dir = None
        self.root_dir = root_dir

        self.strains = DictList()
        self.df_orthology_matrix = pd.DataFrame()
        # Mark if the orthology matrix has gene IDs (thus we need to retrieve seqs from the genome file) or if
        # it is in the orthology matrix itself
        self._orthology_matrix_has_sequences = False

        # Load the GEM-PRO (could be a model, could just be a list of genes)
        # Check if there is a genome file associated with this model - if not, write all sequences and use that
        self.reference_gempro = reference_gempro
        if not reference_genome_path and not self.reference_gempro.genome_path:
            self.reference_gempro.genome_path = self.reference_gempro.write_representative_sequences_file(
                outname=self.reference_gempro.id)
        else:
            self.reference_gempro.genome_path = reference_genome_path
            # TODO: must also check if reference_genome_path gene IDs can be matched to the reference_gempro

        # Also create an attribute
        self._empty_reference_gempro = None
        if self.reference_gempro.model:
            # If there is a SBML model associated with the GEMPRO, copy that model
            self._empty_reference_gempro = GEMPRO(
                gem_name='Copied reference GEM-PRO',
                gem=self.reference_gempro.model.copy())
            # Reset the GenePro attributes
            for x in self._empty_reference_gempro.genes:
                x.reset_protein()
        else:
            # Otherwise, just copy the list of genes over and rename the IDs
            strain_genes = [x.id for x in self.reference_gempro.genes]
            if len(strain_genes) == 0:
                raise ValueError(
                    'GEM-PRO has no genes, unable to run multi-strain analysis'
                )
            self._empty_reference_gempro = GEMPRO(
                gem_name='Copied reference GEM-PRO', genes_list=strain_genes)
Example #3
0
    def get_dict_with_chain(self,
                            chain,
                            only_keys=None,
                            chain_keys=None,
                            exclude_attributes=None,
                            df_format=False):
        """get_dict method which incorporates attributes found in a specific chain. Does not overwrite any attributes
        in the original StructProp.

        Args:
            chain:
            only_keys:
            chain_keys:
            exclude_attributes:
            df_format:

        Returns:
            dict: attributes of StructProp + the chain specified

        """

        # Choose attributes to return, return everything in the object if a list is not specified
        if not only_keys:
            keys = list(self.__dict__.keys())
        else:
            keys = ssbio.utils.force_list(only_keys)

        # Remove keys you don't want returned
        if exclude_attributes:
            exclude_attributes = ssbio.utils.force_list(exclude_attributes)
            for x in exclude_attributes:
                if x in keys:
                    keys.remove(x)
        else:
            exclude_attributes = []

        exclude_attributes.extend(['mapped_chains', 'chains'])

        final_dict = {
            k: v
            for k, v in Object.get_dict(self,
                                        only_attributes=keys,
                                        exclude_attributes=exclude_attributes,
                                        df_format=df_format).items()
        }

        chain_prop = self.chains.get_by_id(chain)
        # Filter out keys that show up in StructProp
        if not chain_keys:
            chain_keys = [
                x for x in chain_prop.get_dict().keys() if x not in final_dict
            ]

        chain_dict = chain_prop.get_dict(only_attributes=chain_keys,
                                         df_format=df_format)
        final_dict.update(chain_dict)

        return final_dict
Example #4
0
class TestObject(unittest.TestCase):
    """Unit tests for Object"""
    @classmethod
    def setUpClass(self):
        self.ob = Object(id='idtester', description='nametester')

    def test_attr(self):
        self.assertTrue(hasattr(self.ob, 'id'))
        self.assertTrue(hasattr(self.ob, 'description'))

    def test_update(self):
        new_stuff = {
            'newkey': 'newvalue',
            'dontadd': 'dontadd',
            'description': 'newdescription'
        }
        self.ob.update(newdata=new_stuff,
                       overwrite=False,
                       only_keys=['newkey', 'description'])
        self.assertTrue(hasattr(self.ob, 'newkey'))
        self.assertEqual('nametester', self.ob.description)

        self.ob.update(newdata=new_stuff,
                       overwrite=True,
                       only_keys=['description'])
        self.assertEqual('newdescription', self.ob.description)

        self.ob.update(newdata=new_stuff, overwrite=True)
        self.assertEqual('newdescription', self.ob.description)
        self.assertEqual('newvalue', self.ob.newkey)
        self.assertEqual('dontadd', self.ob.dontadd)

    def test_get_dict(self):
        gotdict = self.ob.get_dict(only_attributes='id')
        self.assertEqual(gotdict, {'id': 'idtester'})
Example #5
0
    def __init__(self,
                 ident,
                 description=None,
                 chains=None,
                 mapped_chains=None,
                 is_experimental=False,
                 structure_path=None,
                 file_type=None):
        """Initialize a StructProp object.
        
        Args:
            ident (str): Unique identifier for this structure 
            description (str): Optional human-readable description
            chains (str, list): Chain ID or list of IDs
            mapped_chains (str, list): A chain ID or IDs to indicate what chains should be analyzed
            is_experimental (bool): Flag to indicate if structure is an experimental or computational model
            structure_path (str): Path to structure file 
            file_type (str): Type of structure file - ``pdb``, ``pdb.gz``, ``mmcif``, ``cif``, ``cif.gz``,
                ``xml.gz``, ``mmtf``, ``mmtf.gz``

        """
        Object.__init__(self, id=ident, description=description)

        self.is_experimental = is_experimental

        # Chain information
        # chains is a DictList of ChainProp objects
        # If you run self.parse_structure(), all chains will be parsed and stored here
        # Use mapped_chains below to keep track of chains you are interested in
        self.chains = DictList()
        if chains:
            self.add_chain_ids(chains)
        # mapped_chains is an ordered list of mapped chain IDs which would come from BLAST or the best_structures API
        self.mapped_chains = []
        if mapped_chains:
            self.add_mapped_chain_ids(mapped_chains)

        # File information
        self.file_type = file_type
        self._structure_dir = None
        self.structure_file = None
        if structure_path:
            self.load_structure_path(structure_path, file_type)
Example #6
0
    def __init__(self,
                 ident,
                 description=None,
                 chains=None,
                 mapped_chains=None,
                 is_experimental=False,
                 structure_path=None,
                 file_type=None):
        Object.__init__(self, id=ident, description=description)

        self.is_experimental = is_experimental
        """bool: Flag to note if this structure is an experimental model or a homology model"""

        # Chain information
        # chains is a DictList of ChainProp objects
        # If you run self.parse_structure(), all chains will be parsed and stored here
        # Use mapped_chains below to keep track of chains you are interested in
        self.chains = DictList()
        """DictList: A DictList of chains have their sequence stored in them, along with residue-specific"""
        if chains:
            self.add_chain_ids(chains)
        # mapped_chains is an ordered list of mapped chain IDs which would come from BLAST or the best_structures API
        self.mapped_chains = []
        """list: A simple list of chain IDs (strings) that will be used to subset analyses"""
        if mapped_chains:
            self.add_mapped_chain_ids(mapped_chains)

        self.parsed = False
        """bool: Simple flag to track if this structure has had its structure + chain sequences parsed"""
        # XTODO: rename to sequence_parsed or something similar

        # File information
        self.file_type = file_type
        """str: Type of structure file"""
        self._structure_dir = None
        self.structure_file = None
        """str: Name of the structure file"""
        if structure_path:
            self.load_structure_path(structure_path, file_type)

        self.structure = None
        """Structure: Biopython Structure object, only used if ``store_in_memory`` option of ``parse_structure`` is set to True"""
Example #7
0
    def __init__(self,
                 ident,
                 subunits,
                 description=None,
                 root_dir=None,
                 pdb_file_type='mmtf'):
        Object.__init__(self, id=ident, description=description)

        self.subunit_dict = subunits
        """dict: Subunit composition defined as ``{protein_subunit_id: number_of_times_used_in_complex}``"""

        self._subunits = None
        self._oligomeric_state = None

        self.pdb_file_type = pdb_file_type
        """str: ``pdb``, ``pdb.gz``, ``mmcif``, ``cif``, ``cif.gz``, ``xml.gz``, ``mmtf``, ``mmtf.gz`` - choose a file 
        type for files downloaded from the PDB"""

        # Create directories
        self._root_dir = None
        if root_dir:
            self.root_dir = root_dir
Example #8
0
 def save_json(self, outfile, compression=False):
     Object.save_json(self, outfile=outfile, compression=compression)
Example #9
0
 def __json_encode__(self):
     Object.__json_encode__(self)
Example #10
0
 def save_pickle(self, outfile, protocol=2):
     Object.save_pickle(self, outfile=outfile, protocol=protocol)
Example #11
0
 def save_dataframes(self, outdir, prefix='df_'):
     Object.save_dataframes(self, outdir=outdir, prefix=prefix)
Example #12
0
 def update(self, newdata, overwrite=False, only_keys=None):
     Object.update(self,
                   newdata=newdata,
                   overwrite=overwrite,
                   only_keys=only_keys)
Example #13
0
 def __repr__(self):
     return Object.__repr__(self)
Example #14
0
 def __str__(self):
     return Object.__str__(self)
Example #15
0
 def setUpClass(self):
     self.ob = Object(id='idtester', description='nametester')