Example #1
0
    def __init__(self, ident, pdb_parent, seq_record=None, description=None):
        Object.__init__(self, id=ident, description=description)

        self.pdb_parent = pdb_parent
        self.seq_record = seq_record

        if not self.description:
            self.description = 'Chain {} from PDB parent {}'.format(self.id, self.pdb_parent)
Example #2
0
    def __init__(self,
                 atlas_name,
                 root_dir,
                 reference_gempro,
                 reference_genome_path=None,
                 description=None):
        """Prepare a GEM-PRO model for ATLAS analysis

        Args:
            atlas_name (str): Name of your ATLAS project
            root_dir (str): Path to where the folder named after ``atlas_name`` will be created.
            reference_gempro (GEMPRO): GEM-PRO model to use as the reference genome
            reference_genome_path (str): Path to reference genome FASTA file
            description (str): Optional string to describe your project

        """
        Object.__init__(self, id=atlas_name, description=description)

        # Create directories
        self._root_dir = None
        self.root_dir = root_dir

        self.strains = DictList()
        self.df_orthology_matrix = pd.DataFrame()
        # Mark if the orthology matrix has gene IDs (thus we need to retrieve seqs from the genome file) or if
        # it is in the orthology matrix itself
        self._orthology_matrix_has_sequences = False

        # Load the GEM-PRO (could be a model, could just be a list of genes)
        # Check if there is a genome file associated with this model - if not, write all sequences and use that
        self.reference_gempro = reference_gempro
        if not reference_genome_path and not self.reference_gempro.genome_path:
            self.reference_gempro.genome_path = self.reference_gempro.write_representative_sequences_file(
                outname=self.reference_gempro.id)
        else:
            self.reference_gempro.genome_path = reference_genome_path
            # TODO: must also check if reference_genome_path gene IDs can be matched to the reference_gempro

        # Also create an attribute
        self._empty_reference_gempro = None
        if self.reference_gempro.model:
            # If there is a SBML model associated with the GEMPRO, copy that model
            self._empty_reference_gempro = GEMPRO(
                gem_name='Copied reference GEM-PRO',
                gem=self.reference_gempro.model.copy())
            # Reset the GenePro attributes
            for x in self._empty_reference_gempro.genes:
                x.reset_protein()
        else:
            # Otherwise, just copy the list of genes over and rename the IDs
            strain_genes = [x.id for x in self.reference_gempro.genes]
            if len(strain_genes) == 0:
                raise ValueError(
                    'GEM-PRO has no genes, unable to run multi-strain analysis'
                )
            self._empty_reference_gempro = GEMPRO(
                gem_name='Copied reference GEM-PRO', genes_list=strain_genes)
Example #3
0
    def __init__(self,
                 ident,
                 description=None,
                 chains=None,
                 mapped_chains=None,
                 is_experimental=False,
                 structure_path=None,
                 file_type=None):
        """Initialize a StructProp object.
        
        Args:
            ident (str): Unique identifier for this structure 
            description (str): Optional human-readable description
            chains (str, list): Chain ID or list of IDs
            mapped_chains (str, list): A chain ID or IDs to indicate what chains should be analyzed
            is_experimental (bool): Flag to indicate if structure is an experimental or computational model
            structure_path (str): Path to structure file 
            file_type (str): Type of structure file - ``pdb``, ``pdb.gz``, ``mmcif``, ``cif``, ``cif.gz``,
                ``xml.gz``, ``mmtf``, ``mmtf.gz``

        """
        Object.__init__(self, id=ident, description=description)

        self.is_experimental = is_experimental

        # Chain information
        # chains is a DictList of ChainProp objects
        # If you run self.parse_structure(), all chains will be parsed and stored here
        # Use mapped_chains below to keep track of chains you are interested in
        self.chains = DictList()
        if chains:
            self.add_chain_ids(chains)
        # mapped_chains is an ordered list of mapped chain IDs which would come from BLAST or the best_structures API
        self.mapped_chains = []
        if mapped_chains:
            self.add_mapped_chain_ids(mapped_chains)

        # File information
        self.file_type = file_type
        self._structure_dir = None
        self.structure_file = None
        if structure_path:
            self.load_structure_path(structure_path, file_type)
Example #4
0
    def __init__(self,
                 ident,
                 description=None,
                 chains=None,
                 mapped_chains=None,
                 is_experimental=False,
                 structure_path=None,
                 file_type=None):
        Object.__init__(self, id=ident, description=description)

        self.is_experimental = is_experimental
        """bool: Flag to note if this structure is an experimental model or a homology model"""

        # Chain information
        # chains is a DictList of ChainProp objects
        # If you run self.parse_structure(), all chains will be parsed and stored here
        # Use mapped_chains below to keep track of chains you are interested in
        self.chains = DictList()
        """DictList: A DictList of chains have their sequence stored in them, along with residue-specific"""
        if chains:
            self.add_chain_ids(chains)
        # mapped_chains is an ordered list of mapped chain IDs which would come from BLAST or the best_structures API
        self.mapped_chains = []
        """list: A simple list of chain IDs (strings) that will be used to subset analyses"""
        if mapped_chains:
            self.add_mapped_chain_ids(mapped_chains)

        self.parsed = False
        """bool: Simple flag to track if this structure has had its structure + chain sequences parsed"""
        # XTODO: rename to sequence_parsed or something similar

        # File information
        self.file_type = file_type
        """str: Type of structure file"""
        self._structure_dir = None
        self.structure_file = None
        """str: Name of the structure file"""
        if structure_path:
            self.load_structure_path(structure_path, file_type)

        self.structure = None
        """Structure: Biopython Structure object, only used if ``store_in_memory`` option of ``parse_structure`` is set to True"""
Example #5
0
    def __init__(self,
                 ident,
                 subunits,
                 description=None,
                 root_dir=None,
                 pdb_file_type='mmtf'):
        Object.__init__(self, id=ident, description=description)

        self.subunit_dict = subunits
        """dict: Subunit composition defined as ``{protein_subunit_id: number_of_times_used_in_complex}``"""

        self._subunits = None
        self._oligomeric_state = None

        self.pdb_file_type = pdb_file_type
        """str: ``pdb``, ``pdb.gz``, ``mmcif``, ``cif``, ``cif.gz``, ``xml.gz``, ``mmtf``, ``mmtf.gz`` - choose a file 
        type for files downloaded from the PDB"""

        # Create directories
        self._root_dir = None
        if root_dir:
            self.root_dir = root_dir