def __init__(self, ident, pdb_parent, seq_record=None, description=None): Object.__init__(self, id=ident, description=description) self.pdb_parent = pdb_parent self.seq_record = seq_record if not self.description: self.description = 'Chain {} from PDB parent {}'.format(self.id, self.pdb_parent)
def __init__(self, atlas_name, root_dir, reference_gempro, reference_genome_path=None, description=None): """Prepare a GEM-PRO model for ATLAS analysis Args: atlas_name (str): Name of your ATLAS project root_dir (str): Path to where the folder named after ``atlas_name`` will be created. reference_gempro (GEMPRO): GEM-PRO model to use as the reference genome reference_genome_path (str): Path to reference genome FASTA file description (str): Optional string to describe your project """ Object.__init__(self, id=atlas_name, description=description) # Create directories self._root_dir = None self.root_dir = root_dir self.strains = DictList() self.df_orthology_matrix = pd.DataFrame() # Mark if the orthology matrix has gene IDs (thus we need to retrieve seqs from the genome file) or if # it is in the orthology matrix itself self._orthology_matrix_has_sequences = False # Load the GEM-PRO (could be a model, could just be a list of genes) # Check if there is a genome file associated with this model - if not, write all sequences and use that self.reference_gempro = reference_gempro if not reference_genome_path and not self.reference_gempro.genome_path: self.reference_gempro.genome_path = self.reference_gempro.write_representative_sequences_file( outname=self.reference_gempro.id) else: self.reference_gempro.genome_path = reference_genome_path # TODO: must also check if reference_genome_path gene IDs can be matched to the reference_gempro # Also create an attribute self._empty_reference_gempro = None if self.reference_gempro.model: # If there is a SBML model associated with the GEMPRO, copy that model self._empty_reference_gempro = GEMPRO( gem_name='Copied reference GEM-PRO', gem=self.reference_gempro.model.copy()) # Reset the GenePro attributes for x in self._empty_reference_gempro.genes: x.reset_protein() else: # Otherwise, just copy the list of genes over and rename the IDs strain_genes = [x.id for x in self.reference_gempro.genes] if len(strain_genes) == 0: raise ValueError( 'GEM-PRO has no genes, unable to run multi-strain analysis' ) self._empty_reference_gempro = GEMPRO( gem_name='Copied reference GEM-PRO', genes_list=strain_genes)
def __init__(self, ident, description=None, chains=None, mapped_chains=None, is_experimental=False, structure_path=None, file_type=None): """Initialize a StructProp object. Args: ident (str): Unique identifier for this structure description (str): Optional human-readable description chains (str, list): Chain ID or list of IDs mapped_chains (str, list): A chain ID or IDs to indicate what chains should be analyzed is_experimental (bool): Flag to indicate if structure is an experimental or computational model structure_path (str): Path to structure file file_type (str): Type of structure file - ``pdb``, ``pdb.gz``, ``mmcif``, ``cif``, ``cif.gz``, ``xml.gz``, ``mmtf``, ``mmtf.gz`` """ Object.__init__(self, id=ident, description=description) self.is_experimental = is_experimental # Chain information # chains is a DictList of ChainProp objects # If you run self.parse_structure(), all chains will be parsed and stored here # Use mapped_chains below to keep track of chains you are interested in self.chains = DictList() if chains: self.add_chain_ids(chains) # mapped_chains is an ordered list of mapped chain IDs which would come from BLAST or the best_structures API self.mapped_chains = [] if mapped_chains: self.add_mapped_chain_ids(mapped_chains) # File information self.file_type = file_type self._structure_dir = None self.structure_file = None if structure_path: self.load_structure_path(structure_path, file_type)
def __init__(self, ident, description=None, chains=None, mapped_chains=None, is_experimental=False, structure_path=None, file_type=None): Object.__init__(self, id=ident, description=description) self.is_experimental = is_experimental """bool: Flag to note if this structure is an experimental model or a homology model""" # Chain information # chains is a DictList of ChainProp objects # If you run self.parse_structure(), all chains will be parsed and stored here # Use mapped_chains below to keep track of chains you are interested in self.chains = DictList() """DictList: A DictList of chains have their sequence stored in them, along with residue-specific""" if chains: self.add_chain_ids(chains) # mapped_chains is an ordered list of mapped chain IDs which would come from BLAST or the best_structures API self.mapped_chains = [] """list: A simple list of chain IDs (strings) that will be used to subset analyses""" if mapped_chains: self.add_mapped_chain_ids(mapped_chains) self.parsed = False """bool: Simple flag to track if this structure has had its structure + chain sequences parsed""" # XTODO: rename to sequence_parsed or something similar # File information self.file_type = file_type """str: Type of structure file""" self._structure_dir = None self.structure_file = None """str: Name of the structure file""" if structure_path: self.load_structure_path(structure_path, file_type) self.structure = None """Structure: Biopython Structure object, only used if ``store_in_memory`` option of ``parse_structure`` is set to True"""
def __init__(self, ident, subunits, description=None, root_dir=None, pdb_file_type='mmtf'): Object.__init__(self, id=ident, description=description) self.subunit_dict = subunits """dict: Subunit composition defined as ``{protein_subunit_id: number_of_times_used_in_complex}``""" self._subunits = None self._oligomeric_state = None self.pdb_file_type = pdb_file_type """str: ``pdb``, ``pdb.gz``, ``mmcif``, ``cif``, ``cif.gz``, ``xml.gz``, ``mmtf``, ``mmtf.gz`` - choose a file type for files downloaded from the PDB""" # Create directories self._root_dir = None if root_dir: self.root_dir = root_dir