def __init__(self, results, threshold=None, k=10, solver="glpk", verbosity=0): """ :param result: Epitope prediction result object from which the epitope selection should be performed :type result: :class:`~Fred2.Core.Result.EpitopePredictionResult` :param dict(str,float) threshold: A dictionary scoring the binding thresholds for each HLA :class:`~Fred2.Core.Allele.Allele` key = allele name; value = the threshold :param int k: The number of epitopes to select :param str solver: The solver to be used (default glpk) :param int verbosity: Integer defining whether additional debugg prints are made >0 => debug mode """ #check input data if not isinstance(results, EpitopePredictionResult): raise ValueError( "first input parameter is not of type EpitopePredictionResult") _alleles = copy.deepcopy(results.columns.values.tolist()) #test if allele prob is set, if not set allele prob uniform #if only partly set infer missing values (assuming uniformity of missing value) prob = [] no_prob = [] for a in _alleles: if a.prob is None: no_prob.append(a) else: prob.append(a) if len(no_prob) > 0: #group by locus no_prob_grouped = {} prob_grouped = {} for a in no_prob: no_prob_grouped.setdefault(a.locus, []).append(a) for a in prob: prob_grouped.setdefault(a.locus, []).append(a) for g, v in no_prob_grouped.items(): total_loc_a = len(v) if g in prob_grouped: remaining_mass = 1.0 - sum(a.prob for a in prob_grouped[g]) for a in v: a.prob = remaining_mass / total_loc_a else: for a in v: a.prob = 1.0 / total_loc_a probs = {a.name: a.prob for a in _alleles} if verbosity: for a in _alleles: print(a.name, a.prob) #start constructing model self.__solver = SolverFactory(solver) self.__verbosity = verbosity self.__changed = True self.__alleleProb = _alleles self.__k = k self.__result = None self.__thresh = {} if threshold is None else threshold # Variable, Set and Parameter preparation alleles_I = {} variations = [] epi_var = {} imm = {} peps = {} cons = {} #unstack multiindex df to get normal df based on first prediction method #and filter for binding epitopes method = results.index.values[0][1] res_df = results.xs(results.index.values[0][1], level="Method") res_df = res_df[res_df.apply( lambda x: any(x[a] > self.__thresh.get(a.name, -float("inf")) for a in res_df.columns), axis=1)] for tup in res_df.itertuples(): p = tup[0] seq = str(p) peps[seq] = p for a, s in itr.izip(res_df.columns, tup[1:]): if method in ["smm", "smmpmbec", "arb", "comblibsidney"]: try: thr = min( 1., max( 0.0, 1.0 - math.log(self.__thresh.get(a.name), 50000)) ) if a.name in self.__thresh else -float("inf") except: thr = 0 if s >= thr: alleles_I.setdefault(a.name, set()).add(seq) imm[seq, a.name] = min(1., max(0.0, 1.0 - math.log(s, 50000))) else: if s > self.__thresh.get(a.name, -float("inf")): alleles_I.setdefault(a.name, set()).add(seq) imm[seq, a.name] = s prots = set(pr for pr in p.get_all_proteins()) cons[seq] = len(prots) for prot in prots: variations.append(prot.gene_id) epi_var.setdefault(prot.gene_id, set()).add(seq) self.__peptideSet = peps #calculate conservation variations = set(variations) total = len(variations) for e, v in cons.items(): try: cons[e] = v / total except ZeroDivisionError: cons[e] = 1 model = ConcreteModel() #set definition model.Q = Set(initialize=variations) model.E = Set(initialize=set(peps.keys())) model.A = Set(initialize=list(alleles_I.keys())) model.E_var = Set(model.Q, initialize=lambda mode, v: epi_var[v]) model.A_I = Set(model.A, initialize=lambda model, a: alleles_I[a]) #parameter definition model.k = Param(initialize=self.__k, within=PositiveIntegers, mutable=True) model.p = Param(model.A, initialize=lambda model, a: probs[a]) model.c = Param(model.E, initialize=lambda model, e: cons[e], mutable=True) #threshold parameters model.i = Param(model.E, model.A, initialize=lambda model, e, a: imm[e, a]) model.t_allele = Param(initialize=0, within=NonNegativeIntegers, mutable=True) model.t_var = Param(initialize=0, within=NonNegativeIntegers, mutable=True) model.t_c = Param(initialize=0.0, within=NonNegativeReals, mutable=True) # Variable Definition model.x = Var(model.E, within=Binary) model.y = Var(model.A, within=Binary) model.z = Var(model.Q, within=Binary) # Objective definition model.Obj = Objective( rule=lambda model: sum(model.x[e] * sum(model.p[a] * model.i[e, a] for a in model.A) for e in model.E), sense=maximize) #Obligatory Constraint (number of selected epitopes) model.NofSelectedEpitopesCov = Constraint( rule=lambda model: sum(model.x[e] for e in model.E) <= model.k) #optional constraints (in basic model they are disabled) model.IsAlleleCovConst = Constraint( model.A, rule=lambda model, a: sum(model.x[e] for e in model.A_I[a]) >= model.y[a]) model.MinAlleleCovConst = Constraint(rule=lambda model: sum( model.y[a] for a in model.A) >= model.t_allele) model.IsAntigenCovConst = Constraint( model.Q, rule=lambda model, q: sum(model.x[e] for e in model.E_var[q]) >= model.z[q]) model.MinAntigenCovConst = Constraint( rule=lambda model: sum(model.z[q] for q in model.Q) >= model.t_var) model.EpitopeConsConst = Constraint( model.E, rule=lambda model, e: (1 - model.c[e]) * model.x[e] <= 1 - model.t_c) #generate instance self.instance = model if self.__verbosity > 0: print("MODEL INSTANCE") self.instance.pprint() #constraints self.instance.IsAlleleCovConst.deactivate() self.instance.MinAlleleCovConst.deactivate() self.instance.IsAntigenCovConst.deactivate() self.instance.MinAntigenCovConst.deactivate() self.instance.EpitopeConsConst.deactivate()
def __init__(self, results, threshold=None, dist_threshold=1.0, distance={}, expression={}, uncertainty={}, overlap=0, k=10, k_taa=0, solver="glpk", verbosity=0, include=[]): """ :param results: Epitope prediction result object from which the epitope selection should be performed :type results: :class:`~Fred2.Core.Result.EpitopePredictionResult` :param dict(str,float) threshold: A dictionary scoring the binding thresholds for each HLA :class:`~Fred2.Core.Allele.Allele` key = allele name; value = the threshold :param float dist_threshold: Distance threshold: an epitope gets excluded if an epitope has dist-2-self score smaller or equal to this threshold for any HLA allele :param dict((str,str),float) distance: A dictionary with key: (peptide sequence, HLA name) and value the distance2self :param dict(str, float) expression: A dictionary with key: gene ID, and value: Gene expression in FPKM/RPKM or TPM :param dict((str,str),float) uncertainty: A dictionary with key (peptide seq, HLA name), and value the associated uncertainty of the immunogenicity prediction :param int k: The number of epitopes to select :param int k_taa: The number of TAA epitopes to select :param str solver: The solver to be used (default glpk) :param int verbosity: Integer defining whether additional debug prints are made >0 => debug mode """ # check input data if not isinstance(results, EpitopePredictionResult): raise ValueError( "first input parameter is not of type EpitopePredictionResult") _alleles = results.columns.values.tolist() # generate abundance dictionary of HLA alleles default is 2.0 as values will be log2 transformed probs = { a.name: 2.0 if a.get_metadata("abundance", only_first=True) is None else a.get_metadata("abundance", only_first=True) for a in _alleles } # start constructing model self.__solver = SolverFactory(solver) self.__verbosity = verbosity self.__changed = True self.__alleleProb = _alleles self.__k = k self.__k_taa = k_taa self.__result = None self.__thresh = {} if threshold is None else threshold self.__included = include self.overlap = overlap # variable, set and parameter preparation alleles_I = {} variations = [] epi_var = {} imm = {} peps = {} taa = [] var_epi = {} cons = {} for a in _alleles: alleles_I.setdefault(a.name, set()) # unstack multiindex df to get normal df based on first prediction method # and filter for binding epitopes method = results.index.values[0][1] res_df = results.xs(results.index.values[0][1], level="Method") # if predcitions are not available for peptides/alleles, replace by 0 res_df.fillna(0, inplace=True) res_df = res_df[res_df.apply( lambda x: any(x[a] > self.__thresh.get(a.name, -float("inf")) for a in res_df.columns), axis=1)] res_df.fillna(0, inplace=True) # transform scores to 1-log50k(IC50) scores if neccassary # and generate mapping dictionaries for Set definitions for tup in res_df.itertuples(): p = tup[0] seq = str(p) if any( distance.get((seq, a.name), 1.0) <= dist_threshold for a in _alleles): continue peps[seq] = p if p.get_metadata("taa", only_first=True): taa.append(seq) for a, s in itr.izip(res_df.columns, tup[1:]): if method in ["smm", "smmpmbec", "arb", "comblibsidney"]: try: thr = min( 1., max( 0.0, 1.0 - math.log(self.__thresh.get(a.name), 50000)) ) if a.name in self.__thresh else -float("inf") except: thr = 0 if s >= thr: alleles_I.setdefault(a.name, set()).add(seq) imm[seq, a.name] = min(1., max(0.0, 1.0 - math.log(s, 50000))) else: if s > self.__thresh.get(a.name, -float("inf")): alleles_I.setdefault(a.name, set()).add(seq) imm[seq, a.name] = s prots = set(pr for pr in p.get_all_proteins()) cons[seq] = len(prots) for prot in prots: variations.append(prot.gene_id) epi_var.setdefault(prot.gene_id, set()).add(seq) var_epi.setdefault(str(seq), set()).add(prot.gene_id) self.__peptideSet = peps # calculate conservation variations = set(variations) total = len(variations) for e, v in cons.iteritems(): try: cons[e] = v / total except ZeroDivisionError: cons[e] = 1 model = ConcreteModel() ###################################### # # MODEL DEFINITIONS # ###################################### # set definition model.Q = Set(initialize=variations) model.E = Set(initialize=set(peps.keys())) model.TAA = Set(initialize=set(taa)) model.A = Set(initialize=alleles_I.keys()) model.G = Set(model.E, initialize=lambda model, e: var_epi[e]) model.E_var = Set(model.Q, initialize=lambda mode, v: epi_var[v]) model.A_I = Set(model.A, initialize=lambda model, a: alleles_I[a]) if self.__included is not None: if len(self.__included) > k: raise ValueError( "More epitopes to include than epitopes to select! " "Either raise k or reduce epitopes to include.") model.Include = Set(within=model.E, initialize=self.__included) if overlap > 0: def longest_common_substring(model): result = [] for s1, s2 in itr.combinations(model.E, 2): if s1 != s2: if s1 in s2 or s2 in s1: result.append((s1, s2)) m = [[0] * (1 + len(s2)) for i in xrange(1 + len(s1))] longest, x_longest = 0, 0 for x in xrange(1, 1 + len(s1)): for y in xrange(1, 1 + len(s2)): if s1[x - 1] == s2[y - 1]: m[x][y] = m[x - 1][y - 1] + 1 if m[x][y] > longest: longest = m[x][y] x_longest = x else: m[x][y] = 0 if len(s1[x_longest - longest:x_longest]) >= overlap: result.append((s1, s2)) return set(result) model.O = Set(dimen=2, initialize=longest_common_substring) # parameter definition model.k = Param(initialize=self.__k, within=PositiveIntegers, mutable=True) model.k_taa = Param(initialize=self.__k_taa, within=NonNegativeIntegers, mutable=True) model.p = Param( model.A, initialize=lambda model, a: max(0, math.log(probs[a] + 0.001, 2))) model.c = Param(model.E, initialize=lambda model, e: cons[e], mutable=True) model.sigma = Param(model.E, model.A, initialize=lambda model, e, a: uncertainty.get( (e, a), 0)) model.i = Param(model.E, model.A, initialize=lambda model, e, a: imm[e, a]) model.t_allele = Param(initialize=0, within=NonNegativeIntegers, mutable=True) model.t_var = Param(initialize=0, within=NonNegativeIntegers, mutable=True) model.t_c = Param(initialize=0.0, within=NonNegativeReals, mutable=True) model.abd = Param(model.Q, initialize=lambda model, g: max( 0, math.log(expression.get(g, 2) + 0.001, 2))) model.eps1 = Param(initialize=1e6, mutable=True) model.eps2 = Param(initialize=1e6, mutable=True) # variable Definition model.x = Var(model.E, within=Binary) model.y = Var(model.A, within=Binary) model.z = Var(model.Q, within=Binary) # objective definition model.Obj1 = Objective(rule=lambda model: -sum(model.x[e] * sum( model.abd[g] for g in model.G[e]) * sum(model.p[a] * model.i[e, a] for a in model.A) for e in model.E), sense=minimize) model.Obj2 = Objective( rule=lambda model: sum(model.x[e] * sum(model.sigma[e, a] for a in model.A) for e in model.E), sense=minimize) # constraints # obligatory Constraint (number of selected epitopes) model.NofSelectedEpitopesCov1 = Constraint( rule=lambda model: sum(model.x[e] for e in model.E) >= model.k) model.NofSelectedEpitopesCov2 = Constraint( rule=lambda model: sum(model.x[e] for e in model.E) <= model.k) model.NofSelectedTAACov = Constraint(rule=lambda model: sum( model.x[e] for e in model.TAA) <= model.k_taa) # optional constraints (in basic model they are disabled) model.IsAlleleCovConst = Constraint( model.A, rule=lambda model, a: sum(model.x[e] for e in model.A_I[a]) >= model.y[a]) model.MinAlleleCovConst = Constraint(rule=lambda model: sum( model.y[a] for a in model.A) >= model.t_allele) model.IsAntigenCovConst = Constraint( model.Q, rule=lambda model, q: sum(model.x[e] for e in model.E_var[q]) >= model.z[q]) model.MinAntigenCovConst = Constraint( rule=lambda model: sum(model.z[q] for q in model.Q) >= model.t_var) model.EpitopeConsConst = Constraint( model.E, rule=lambda model, e: (1 - model.c[e]) * model.x[e] <= 1 - model.t_c) if overlap > 0: model.OverlappingConstraint = Constraint( model.O, rule=lambda model, e1, e2: model.x[e1] + model.x[e2] <= 1) # constraints for Pareto optimization model.ImmConst = Constraint(rule=lambda model: sum(model.x[e] * sum( model.abd[g] for g in model.G[e]) * sum(model.p[a] * model.i[ e, a] for a in model.A) for e in model.E) <= model.eps1) model.UncertaintyConst = Constraint( rule=lambda model: sum(model.x[e] * sum(model.sigma[ e, a] for a in model.A) for e in model.E) <= model.eps2) self.__objectives = [model.Obj1, model.Obj2] self.__constraints = [model.UncertaintyConst, model.ImmConst] self.__epsilons = [model.eps2, model.eps1] # include constraint model.IncludeEpitopeConstraint = Constraint( model.Include, rule=lambda model, e: model.x[e] >= 1) # generate instance self.instance = model if self.__verbosity > 0: print "MODEL INSTANCE" self.instance.pprint() # constraints self.instance.Obj2.deactivate() self.instance.ImmConst.deactivate() self.instance.UncertaintyConst.deactivate() self.instance.IsAlleleCovConst.deactivate() self.instance.MinAlleleCovConst.deactivate() self.instance.IsAntigenCovConst.deactivate() self.instance.MinAntigenCovConst.deactivate() self.instance.EpitopeConsConst.deactivate()
def __init__(self, results, threshold=None, k=10, solver="glpk", verbosity=0): """ :param result: Epitope prediction result object from which the epitope selection should be performed :type result: :class:`~Fred2.Core.Result.EpitopePredictionResult` :param dict(str,float) threshold: A dictionary scoring the binding thresholds for each HLA :class:`~Fred2.Core.Allele.Allele` key = allele name; value = the threshold :param int k: The number of epitopes to select :param str solver: The solver to be used (default glpk) :param int verbosity: Integer defining whether additional debugg prints are made >0 => debug mode """ #check input data if not isinstance(results, EpitopePredictionResult): raise ValueError("first input parameter is not of type EpitopePredictionResult") _alleles = copy.deepcopy(results.columns.values.tolist()) #test if allele prob is set, if not set allele prob uniform #if only partly set infer missing values (assuming uniformity of missing value) prob = [] no_prob = [] for a in _alleles: if a.prob is None: no_prob.append(a) else: prob.append(a) if len(no_prob) > 0: #group by locus no_prob_grouped = {} prob_grouped = {} for a in no_prob: no_prob_grouped.setdefault(a.locus, []).append(a) for a in prob: prob_grouped.setdefault(a.locus, []).append(a) for g, v in no_prob_grouped.iteritems(): total_loc_a = len(v) if g in prob_grouped: remaining_mass = 1.0 - sum(a.prob for a in prob_grouped[g]) for a in v: a.prob = remaining_mass/total_loc_a else: for a in v: a.prob = 1.0/total_loc_a probs = {a.name:a.prob for a in _alleles} if verbosity: for a in _alleles: print a.name, a.prob #start constructing model self.__solver = SolverFactory(solver) self.__verbosity = verbosity self.__changed = True self.__alleleProb = _alleles self.__k = k self.__result = None self.__thresh = {} if threshold is None else threshold # Variable, Set and Parameter preparation alleles_I = {} variations = [] epi_var = {} imm = {} peps = {} cons = {} #unstack multiindex df to get normal df based on first prediction method #and filter for binding epitopes method = results.index.values[0][1] res_df = results.xs(results.index.values[0][1], level="Method") res_df = res_df[res_df.apply(lambda x: any(x[a] > self.__thresh.get(a.name, -float("inf")) for a in res_df.columns), axis=1)] for tup in res_df.itertuples(): p = tup[0] seq = str(p) peps[seq] = p for a, s in itr.izip(res_df.columns, tup[1:]): if method in ["smm", "smmpmbec", "arb", "comblibsidney"]: try: thr = min(1., max(0.0, 1.0 - math.log(self.__thresh.get(a.name), 50000))) if a.name in self.__thresh else -float("inf") except: thr = 0 if s >= thr: alleles_I.setdefault(a.name, set()).add(seq) imm[seq, a.name] = min(1., max(0.0, 1.0 - math.log(s, 50000))) else: if s > self.__thresh.get(a.name, -float("inf")): alleles_I.setdefault(a.name, set()).add(seq) imm[seq, a.name] = s prots = set(pr for pr in p.get_all_proteins()) cons[seq] = len(prots) for prot in prots: variations.append(prot.gene_id) epi_var.setdefault(prot.gene_id, set()).add(seq) self.__peptideSet = peps #calculate conservation variations = set(variations) total = len(variations) for e, v in cons.iteritems(): try: cons[e] = v / total except ZeroDivisionError: cons[e] = 1 model = ConcreteModel() #set definition model.Q = Set(initialize=variations) model.E = Set(initialize=set(peps.keys())) model.A = Set(initialize=alleles_I.keys()) model.E_var = Set(model.Q, initialize=lambda mode, v: epi_var[v]) model.A_I = Set(model.A, initialize=lambda model, a: alleles_I[a]) #parameter definition model.k = Param(initialize=self.__k, within=PositiveIntegers, mutable=True) model.p = Param(model.A, initialize=lambda model, a: probs[a]) model.c = Param(model.E, initialize=lambda model, e: cons[e],mutable=True) #threshold parameters model.i = Param(model.E, model.A, initialize=lambda model, e, a: imm[e, a]) model.t_allele = Param(initialize=0, within=NonNegativeIntegers, mutable=True) model.t_var = Param(initialize=0, within=NonNegativeIntegers, mutable=True) model.t_c = Param(initialize=0.0, within=NonNegativeReals, mutable=True) # Variable Definition model.x = Var(model.E, within=Binary) model.y = Var(model.A, within=Binary) model.z = Var(model.Q, within=Binary) # Objective definition model.Obj = Objective( rule=lambda model: sum(model.x[e] * sum(model.p[a] * model.i[e, a] for a in model.A) for e in model.E), sense=maximize) #Obligatory Constraint (number of selected epitopes) model.NofSelectedEpitopesCov = Constraint(rule=lambda model: sum(model.x[e] for e in model.E) <= model.k) #optional constraints (in basic model they are disabled) model.IsAlleleCovConst = Constraint(model.A, rule=lambda model, a: sum(model.x[e] for e in model.A_I[a]) >= model.y[a]) model.MinAlleleCovConst = Constraint(rule=lambda model: sum(model.y[a] for a in model.A) >= model.t_allele) model.IsAntigenCovConst = Constraint(model.Q, rule=lambda model, q: sum(model.x[e] for e in model.E_var[q]) >= model.z[q]) model.MinAntigenCovConst = Constraint(rule=lambda model: sum(model.z[q] for q in model.Q) >= model.t_var) model.EpitopeConsConst = Constraint(model.E, rule=lambda model, e: (1 - model.c[e]) * model.x[e] <= 1 - model.t_c) #generate instance self.instance = model if self.__verbosity > 0: print "MODEL INSTANCE" self.instance.pprint() #constraints self.instance.IsAlleleCovConst.deactivate() self.instance.MinAlleleCovConst.deactivate() self.instance.IsAntigenCovConst.deactivate() self.instance.MinAntigenCovConst.deactivate() self.instance.EpitopeConsConst.deactivate()
def __init__(self, results, threshold=None, dist_threshold=1.0, distance={}, expression={}, uncertainty={}, overlap=0, k=10, k_taa=0, solver="glpk", verbosity=0, include=[]): """ :param results: Epitope prediction result object from which the epitope selection should be performed :type results: :class:`~Fred2.Core.Result.EpitopePredictionResult` :param dict(str,float) threshold: A dictionary scoring the binding thresholds for each HLA :class:`~Fred2.Core.Allele.Allele` key = allele name; value = the threshold :param float dist_threshold: Distance threshold: an epitope gets excluded if an epitope has dist-2-self score smaller or equal to this threshold for any HLA allele :param dict((str,str),float) distance: A dictionary with key: (peptide sequence, HLA name) and value the distance2self :param dict(str, float) expression: A dictionary with key: gene ID, and value: Gene expression in FPKM/RPKM or TPM :param dict((str,str),float) uncertainty: A dictionary with key (peptide seq, HLA name), and value the associated uncertainty of the immunogenicity prediction :param int k: The number of epitopes to select :param int k_taa: The number of TAA epitopes to select :param str solver: The solver to be used (default glpk) :param int verbosity: Integer defining whether additional debug prints are made >0 => debug mode """ # check input data if not isinstance(results, EpitopePredictionResult): raise ValueError("first input parameter is not of type EpitopePredictionResult") _alleles = results.columns.values.tolist() # generate abundance dictionary of HLA alleles default is 2.0 as values will be log2 transformed probs = {a.name:2.0 if a.get_metadata("abundance", only_first=True) is None else a.get_metadata("abundance", only_first=True) for a in _alleles} # start constructing model self.__solver = SolverFactory(solver) self.__verbosity = verbosity self.__changed = True self.__alleleProb = _alleles self.__k = k self.__k_taa = k_taa self.__result = None self.__thresh = {} if threshold is None else threshold self.__included = include self.overlap=overlap # variable, set and parameter preparation alleles_I = {} variations = [] epi_var = {} imm = {} peps = {} taa = [] var_epi = {} cons = {} for a in _alleles: alleles_I.setdefault(a.name, set()) # unstack multiindex df to get normal df based on first prediction method # and filter for binding epitopes method = results.index.values[0][1] res_df = results.xs(results.index.values[0][1], level="Method") # if predcitions are not available for peptides/alleles, replace by 0 res_df.fillna(0, inplace=True) res_df = res_df[res_df.apply(lambda x: any(x[a] > self.__thresh.get(a.name, -float("inf")) for a in res_df.columns), axis=1)] res_df.fillna(0, inplace=True) # transform scores to 1-log50k(IC50) scores if neccassary # and generate mapping dictionaries for Set definitions for tup in res_df.itertuples(): p = tup[0] seq = str(p) if any(distance.get((seq, a.name), 1.0) <= dist_threshold for a in _alleles): continue peps[seq] = p if p.get_metadata("taa",only_first=True): taa.append(seq) for a, s in itr.izip(res_df.columns, tup[1:]): if method in ["smm", "smmpmbec", "arb", "comblibsidney"]: try: thr = min(1., max(0.0, 1.0 - math.log(self.__thresh.get(a.name), 50000))) if a.name in self.__thresh else -float("inf") except: thr = 0 if s >= thr: alleles_I.setdefault(a.name, set()).add(seq) imm[seq, a.name] = min(1., max(0.0, 1.0 - math.log(s, 50000))) else: if s > self.__thresh.get(a.name, -float("inf")): alleles_I.setdefault(a.name, set()).add(seq) imm[seq, a.name] = s prots = set(pr for pr in p.get_all_proteins()) cons[seq] = len(prots) for prot in prots: variations.append(prot.gene_id) epi_var.setdefault(prot.gene_id, set()).add(seq) var_epi.setdefault(str(seq), set()).add(prot.gene_id) self.__peptideSet = peps # calculate conservation variations = set(variations) total = len(variations) for e, v in cons.iteritems(): try: cons[e] = v / total except ZeroDivisionError: cons[e] = 1 model = ConcreteModel() ###################################### # # MODEL DEFINITIONS # ###################################### # set definition model.Q = Set(initialize=variations) model.E = Set(initialize=set(peps.keys())) model.TAA = Set(initialize=set(taa)) model.A = Set(initialize=alleles_I.keys()) model.G = Set(model.E, initialize=lambda model, e: var_epi[e]) model.E_var = Set(model.Q, initialize=lambda mode, v: epi_var[v]) model.A_I = Set(model.A, initialize=lambda model, a: alleles_I[a]) if self.__included is not None: if len(self.__included) > k: raise ValueError("More epitopes to include than epitopes to select! " "Either raise k or reduce epitopes to include.") model.Include = Set(within=model.E, initialize=self.__included) if overlap > 0: def longest_common_substring(model): result = [] for s1,s2 in itr.combinations(model.E,2): if s1 != s2: if s1 in s2 or s2 in s1: result.append((s1,s2)) m = [[0] * (1 + len(s2)) for i in xrange(1 + len(s1))] longest, x_longest = 0, 0 for x in xrange(1, 1 + len(s1)): for y in xrange(1, 1 + len(s2)): if s1[x - 1] == s2[y - 1]: m[x][y] = m[x - 1][y - 1] + 1 if m[x][y] > longest: longest = m[x][y] x_longest = x else: m[x][y] = 0 if len(s1[x_longest - longest: x_longest]) >= overlap: result.append((s1,s2)) return set(result) model.O = Set(dimen=2, initialize=longest_common_substring) # parameter definition model.k = Param(initialize=self.__k, within=PositiveIntegers, mutable=True) model.k_taa = Param(initialize=self.__k_taa, within=NonNegativeIntegers, mutable=True) model.p = Param(model.A, initialize=lambda model, a: max(0, math.log(probs[a]+0.001,2))) model.c = Param(model.E, initialize=lambda model, e: cons[e],mutable=True) model.sigma = Param (model. E, model.A, initialize=lambda model, e, a: uncertainty.get((e,a), 0)) model.i = Param(model.E, model.A, initialize=lambda model, e, a: imm[e, a]) model.t_allele = Param(initialize=0, within=NonNegativeIntegers, mutable=True) model.t_var = Param(initialize=0, within=NonNegativeIntegers, mutable=True) model.t_c = Param(initialize=0.0, within=NonNegativeReals, mutable=True) model.abd = Param(model.Q, initialize=lambda model, g: max(0, math.log(expression.get(g, 2)+0.001, 2))) model.eps1 = Param(initialize=1e6, mutable=True) model.eps2 = Param(initialize=1e6, mutable=True) # variable Definition model.x = Var(model.E, within=Binary) model.y = Var(model.A, within=Binary) model.z = Var(model.Q, within=Binary) # objective definition model.Obj1 = Objective( rule=lambda model: -sum(model.x[e] * sum(model.abd[g] for g in model.G[e]) * sum(model.p[a] * model.i[e, a] for a in model.A) for e in model.E), sense=minimize) model.Obj2 = Objective( rule=lambda model: sum(model.x[e]*sum(model.sigma[e,a] for a in model.A) for e in model.E), sense=minimize) # constraints # obligatory Constraint (number of selected epitopes) model.NofSelectedEpitopesCov1 = Constraint(rule=lambda model: sum(model.x[e] for e in model.E) >= model.k) model.NofSelectedEpitopesCov2 = Constraint(rule=lambda model: sum(model.x[e] for e in model.E) <= model.k) model.NofSelectedTAACov = Constraint(rule=lambda model: sum(model.x[e] for e in model.TAA) <= model.k_taa) # optional constraints (in basic model they are disabled) model.IsAlleleCovConst = Constraint(model.A, rule=lambda model, a: sum(model.x[e] for e in model.A_I[a]) >= model.y[a]) model.MinAlleleCovConst = Constraint(rule=lambda model: sum(model.y[a] for a in model.A) >= model.t_allele) model.IsAntigenCovConst = Constraint(model.Q, rule=lambda model, q: sum(model.x[e] for e in model.E_var[q]) >= model.z[q]) model.MinAntigenCovConst = Constraint(rule=lambda model: sum(model.z[q] for q in model.Q) >= model.t_var) model.EpitopeConsConst = Constraint(model.E, rule=lambda model, e: (1 - model.c[e]) * model.x[e] <= 1 - model.t_c) if overlap > 0: model.OverlappingConstraint = Constraint(model.O, rule=lambda model, e1, e2: model.x[e1]+model.x[e2] <= 1) # constraints for Pareto optimization model.ImmConst = Constraint(rule=lambda model: sum(model.x[e] * sum(model.abd[g] for g in model.G[e]) * sum(model.p[a] * model.i[e, a] for a in model.A) for e in model.E) <= model.eps1) model.UncertaintyConst = Constraint(rule=lambda model:sum(model.x[e]*sum(model.sigma[e,a] for a in model.A) for e in model.E) <= model.eps2) self.__objectives = [model.Obj1, model.Obj2] self.__constraints = [model.UncertaintyConst, model.ImmConst] self.__epsilons = [model.eps2, model.eps1] # include constraint model.IncludeEpitopeConstraint = Constraint(model.Include, rule=lambda model, e: model.x[e] >= 1) # generate instance self.instance = model if self.__verbosity > 0: print "MODEL INSTANCE" self.instance.pprint() # constraints self.instance.Obj2.deactivate() self.instance.ImmConst.deactivate() self.instance.UncertaintyConst.deactivate() self.instance.IsAlleleCovConst.deactivate() self.instance.MinAlleleCovConst.deactivate() self.instance.IsAntigenCovConst.deactivate() self.instance.MinAntigenCovConst.deactivate() self.instance.EpitopeConsConst.deactivate()