def __init__(self, ident, pdb_parent, seq_record=None, description=None): Object.__init__(self, id=ident, description=description) self.pdb_parent = pdb_parent self.seq_record = seq_record if not self.description: self.description = 'Chain {} from PDB parent {}'.format(self.id, self.pdb_parent)
def __init__(self, atlas_name, root_dir, reference_gempro, reference_genome_path=None, description=None): """Prepare a GEM-PRO model for ATLAS analysis Args: atlas_name (str): Name of your ATLAS project root_dir (str): Path to where the folder named after ``atlas_name`` will be created. reference_gempro (GEMPRO): GEM-PRO model to use as the reference genome reference_genome_path (str): Path to reference genome FASTA file description (str): Optional string to describe your project """ Object.__init__(self, id=atlas_name, description=description) # Create directories self._root_dir = None self.root_dir = root_dir self.strains = DictList() self.df_orthology_matrix = pd.DataFrame() # Mark if the orthology matrix has gene IDs (thus we need to retrieve seqs from the genome file) or if # it is in the orthology matrix itself self._orthology_matrix_has_sequences = False # Load the GEM-PRO (could be a model, could just be a list of genes) # Check if there is a genome file associated with this model - if not, write all sequences and use that self.reference_gempro = reference_gempro if not reference_genome_path and not self.reference_gempro.genome_path: self.reference_gempro.genome_path = self.reference_gempro.write_representative_sequences_file( outname=self.reference_gempro.id) else: self.reference_gempro.genome_path = reference_genome_path # TODO: must also check if reference_genome_path gene IDs can be matched to the reference_gempro # Also create an attribute self._empty_reference_gempro = None if self.reference_gempro.model: # If there is a SBML model associated with the GEMPRO, copy that model self._empty_reference_gempro = GEMPRO( gem_name='Copied reference GEM-PRO', gem=self.reference_gempro.model.copy()) # Reset the GenePro attributes for x in self._empty_reference_gempro.genes: x.reset_protein() else: # Otherwise, just copy the list of genes over and rename the IDs strain_genes = [x.id for x in self.reference_gempro.genes] if len(strain_genes) == 0: raise ValueError( 'GEM-PRO has no genes, unable to run multi-strain analysis' ) self._empty_reference_gempro = GEMPRO( gem_name='Copied reference GEM-PRO', genes_list=strain_genes)
def get_dict_with_chain(self, chain, only_keys=None, chain_keys=None, exclude_attributes=None, df_format=False): """get_dict method which incorporates attributes found in a specific chain. Does not overwrite any attributes in the original StructProp. Args: chain: only_keys: chain_keys: exclude_attributes: df_format: Returns: dict: attributes of StructProp + the chain specified """ # Choose attributes to return, return everything in the object if a list is not specified if not only_keys: keys = list(self.__dict__.keys()) else: keys = ssbio.utils.force_list(only_keys) # Remove keys you don't want returned if exclude_attributes: exclude_attributes = ssbio.utils.force_list(exclude_attributes) for x in exclude_attributes: if x in keys: keys.remove(x) else: exclude_attributes = [] exclude_attributes.extend(['mapped_chains', 'chains']) final_dict = { k: v for k, v in Object.get_dict(self, only_attributes=keys, exclude_attributes=exclude_attributes, df_format=df_format).items() } chain_prop = self.chains.get_by_id(chain) # Filter out keys that show up in StructProp if not chain_keys: chain_keys = [ x for x in chain_prop.get_dict().keys() if x not in final_dict ] chain_dict = chain_prop.get_dict(only_attributes=chain_keys, df_format=df_format) final_dict.update(chain_dict) return final_dict
class TestObject(unittest.TestCase): """Unit tests for Object""" @classmethod def setUpClass(self): self.ob = Object(id='idtester', description='nametester') def test_attr(self): self.assertTrue(hasattr(self.ob, 'id')) self.assertTrue(hasattr(self.ob, 'description')) def test_update(self): new_stuff = { 'newkey': 'newvalue', 'dontadd': 'dontadd', 'description': 'newdescription' } self.ob.update(newdata=new_stuff, overwrite=False, only_keys=['newkey', 'description']) self.assertTrue(hasattr(self.ob, 'newkey')) self.assertEqual('nametester', self.ob.description) self.ob.update(newdata=new_stuff, overwrite=True, only_keys=['description']) self.assertEqual('newdescription', self.ob.description) self.ob.update(newdata=new_stuff, overwrite=True) self.assertEqual('newdescription', self.ob.description) self.assertEqual('newvalue', self.ob.newkey) self.assertEqual('dontadd', self.ob.dontadd) def test_get_dict(self): gotdict = self.ob.get_dict(only_attributes='id') self.assertEqual(gotdict, {'id': 'idtester'})
def __init__(self, ident, description=None, chains=None, mapped_chains=None, is_experimental=False, structure_path=None, file_type=None): """Initialize a StructProp object. Args: ident (str): Unique identifier for this structure description (str): Optional human-readable description chains (str, list): Chain ID or list of IDs mapped_chains (str, list): A chain ID or IDs to indicate what chains should be analyzed is_experimental (bool): Flag to indicate if structure is an experimental or computational model structure_path (str): Path to structure file file_type (str): Type of structure file - ``pdb``, ``pdb.gz``, ``mmcif``, ``cif``, ``cif.gz``, ``xml.gz``, ``mmtf``, ``mmtf.gz`` """ Object.__init__(self, id=ident, description=description) self.is_experimental = is_experimental # Chain information # chains is a DictList of ChainProp objects # If you run self.parse_structure(), all chains will be parsed and stored here # Use mapped_chains below to keep track of chains you are interested in self.chains = DictList() if chains: self.add_chain_ids(chains) # mapped_chains is an ordered list of mapped chain IDs which would come from BLAST or the best_structures API self.mapped_chains = [] if mapped_chains: self.add_mapped_chain_ids(mapped_chains) # File information self.file_type = file_type self._structure_dir = None self.structure_file = None if structure_path: self.load_structure_path(structure_path, file_type)
def __init__(self, ident, description=None, chains=None, mapped_chains=None, is_experimental=False, structure_path=None, file_type=None): Object.__init__(self, id=ident, description=description) self.is_experimental = is_experimental """bool: Flag to note if this structure is an experimental model or a homology model""" # Chain information # chains is a DictList of ChainProp objects # If you run self.parse_structure(), all chains will be parsed and stored here # Use mapped_chains below to keep track of chains you are interested in self.chains = DictList() """DictList: A DictList of chains have their sequence stored in them, along with residue-specific""" if chains: self.add_chain_ids(chains) # mapped_chains is an ordered list of mapped chain IDs which would come from BLAST or the best_structures API self.mapped_chains = [] """list: A simple list of chain IDs (strings) that will be used to subset analyses""" if mapped_chains: self.add_mapped_chain_ids(mapped_chains) self.parsed = False """bool: Simple flag to track if this structure has had its structure + chain sequences parsed""" # XTODO: rename to sequence_parsed or something similar # File information self.file_type = file_type """str: Type of structure file""" self._structure_dir = None self.structure_file = None """str: Name of the structure file""" if structure_path: self.load_structure_path(structure_path, file_type) self.structure = None """Structure: Biopython Structure object, only used if ``store_in_memory`` option of ``parse_structure`` is set to True"""
def __init__(self, ident, subunits, description=None, root_dir=None, pdb_file_type='mmtf'): Object.__init__(self, id=ident, description=description) self.subunit_dict = subunits """dict: Subunit composition defined as ``{protein_subunit_id: number_of_times_used_in_complex}``""" self._subunits = None self._oligomeric_state = None self.pdb_file_type = pdb_file_type """str: ``pdb``, ``pdb.gz``, ``mmcif``, ``cif``, ``cif.gz``, ``xml.gz``, ``mmtf``, ``mmtf.gz`` - choose a file type for files downloaded from the PDB""" # Create directories self._root_dir = None if root_dir: self.root_dir = root_dir
def save_json(self, outfile, compression=False): Object.save_json(self, outfile=outfile, compression=compression)
def __json_encode__(self): Object.__json_encode__(self)
def save_pickle(self, outfile, protocol=2): Object.save_pickle(self, outfile=outfile, protocol=protocol)
def save_dataframes(self, outdir, prefix='df_'): Object.save_dataframes(self, outdir=outdir, prefix=prefix)
def update(self, newdata, overwrite=False, only_keys=None): Object.update(self, newdata=newdata, overwrite=overwrite, only_keys=only_keys)
def __repr__(self): return Object.__repr__(self)
def __str__(self): return Object.__str__(self)
def setUpClass(self): self.ob = Object(id='idtester', description='nametester')