def predict(self, peptides_df): """ Determine whether 9-mer peptide is immunogenic by checking 1) that the epitope binds strongly to a particular MHC allele 2) the "core" of the peptide (positions 3-8) don't overlap with any other peptides in the self/thymic MHC ligand sets of that HLA allele Returns DataFrame with two extra columns: - ThymicDeletion: Was this epitope deleted during thymic selection (and thus can't be recognize by T-cells)? - Immunogenic: Is this epitope a sufficiently strong binder that wasn't deleted during thymic selection? """ thymic_peptide_sets = self.peptide_sets.values() # assume a peptide is non-immunogenic unless not in thymic sets # We do this in case some alleles are missing, resulting in all # their associated ligands being considered non-immunogenic peptides_df[THYMIC_DELETION_FIELD_NAME] = True for i in xrange(len(peptides_df)): row = peptides_df.ix[i] peptide = row.Epitope allele = compact_hla_allele_name(row.Allele) if allele in self.peptide_sets: # positions in the epitope are indexed starting from 1 to # match immunology nomenclature substring = \ peptide[self.first_position - 1 : self.last_position] peptides_df[THYMIC_DELETION_FIELD_NAME].ix[i] = \ substring in self.peptide_sets[allele] peptides_df["Immunogenic"] = \ ~peptides_df[THYMIC_DELETION_FIELD_NAME] & \ (peptides_df[IC50_FIELD_NAME] <= self.binding_threshold) return peptides_df
def __init__( self, alleles, data_path = DEFAULT_PEPTIDE_DIR, binding_threshold = 500, first_position = 3, last_position = 8): """ Parameters -------- alleles : list of strings data_path : str, optional first_position : int, optional Start position for extracting substring of query peptide (indexed starting from 1) last_position : int, optional Last position for extracting substring of query peptide (indexed starting from 1) """ self.binding_threshold = binding_threshold self.first_position = first_position self.last_position = last_position self.alleles = { compact_hla_allele_name(allele) for allele in alleles } self.data_path = data_path assert exists(self.data_path), \ "Directory with thymic peptides (%s) does not exist" % \ self.data_path available_alleles = listdir(self.data_path) mappings_file_path = join(self.data_path, 'mappings') if exists(mappings_file_path): self.allele_mappings = \ _load_allele_mapping_dict(mappings_file_path) else: self.allele_mappings = \ dict(zip(available_alleles, available_alleles)) self.peptide_sets = {} for allele in self.alleles: if allele not in self.allele_mappings: logging.warn( "No MHC peptide set available for HLA allele %s", allele) continue else: logging.info( "Loading thymic MHC peptide set for HLA allele %s", allele) filename = self.allele_mappings[allele] assert filename in available_alleles, \ "No MHC peptide set available for HLA allele %s (file = %s)" % \ (allele,filename) with open(join(self.data_path, filename), 'r') as f: peptide_set = {l for l in f.read().split("\n") if len(l) > 0} self.peptide_sets[allele] = peptide_set
def __init__(self, alleles, data_path=DEFAULT_PEPTIDE_DIR, binding_threshold=500, first_position=3, last_position=8): """ Parameters -------- alleles : list of strings data_path : str, optional first_position : int, optional Start position for extracting substring of query peptide (indexed starting from 1) last_position : int, optional Last position for extracting substring of query peptide (indexed starting from 1) """ self.binding_threshold = binding_threshold self.first_position = first_position self.last_position = last_position self.alleles = {compact_hla_allele_name(allele) for allele in alleles} self.data_path = data_path assert exists(self.data_path), \ "Directory with thymic peptides (%s) does not exist" % \ self.data_path available_alleles = listdir(self.data_path) mappings_file_path = join(self.data_path, 'mappings') if exists(mappings_file_path): self.allele_mappings = \ _load_allele_mapping_dict(mappings_file_path) else: self.allele_mappings = \ dict(zip(available_alleles, available_alleles)) self.peptide_sets = {} for allele in self.alleles: if allele not in self.allele_mappings: logging.warn("No MHC peptide set available for HLA allele %s", allele) continue else: logging.info( "Loading thymic MHC peptide set for HLA allele %s", allele) filename = self.allele_mappings[allele] assert filename in available_alleles, \ "No MHC peptide set available for HLA allele %s (file = %s)" % \ (allele,filename) with open(join(self.data_path, filename), 'r') as f: peptide_set = {l for l in f.read().split("\n") if len(l) > 0} self.peptide_sets[allele] = peptide_set