class HMMModelCreation(ModelCreationAbstractclass): ex = Exception( "For the model creation from HMM, input is required. Please select a HMM file using [-i]." ) open_hmm = lambda _, i: gh.HMMOpen(fileName=i, filetype=gh.GHMM_FILETYPE_XML) def create_states(self, **kwargs): raise NotImplementedError( "State look-up table creation for HMM currently not available.") @return_numpy_array def create_prediction_model(self, **kwargs): if kwargs["input"] == None: raise self.ex hmm = self.open_hmm(kwargs["input"]) trans, _, _ = hmm.asMatrices() return trans @return_numpy_array def create_observation_model(self, **kwargs): if kwargs["input"] == None: raise self.ex hmm = self.open_hmm(kwargs["input"]) _, emi, _ = hmm.asMatrices() return emi
def tffm_from_xml(xml, kind): """ Construct a TFFM described in an XML file. :arg xml: File containing the TFFM description in XML format. :type xml: str :arg kind: Type of TFFM to construct between '1st-order' and 'detailed'. :type kind: str :returns: The TFFM described in the XML file. :rtype: :class:`TFFM` """ hmm = ghmm.HMMOpen(xml) return TFFM(hmm.emissionDomain, hmm.distribution, hmm.cmodel, kind)
def __create_hmm_from_xml(self, xml): """Creates a hmm from the xml representation. Not nice to use tempfile but not otherwise possible due to hidden code and swig in ghmm. :param xml: The xml string :return: the ghmm hmm object """ fd, name = tempfile.mkstemp() f = os.fdopen(fd, 'w') f.write(xml) f.close( ) # File has to be closed before reading from it. Otherwise this will fail. hmm = gh.HMMOpen(fileName=name, filetype=gh.GHMM_FILETYPE_XML) os.remove(name) return hmm
def __init__(self, allele): ''' Initialize the predictor. For now, only H2-IAb is supported. ''' if (allele != 'H2-IAb'): raise ValueError('Unsupported allele') self.allele = allele # range of allowed peptide lengths (percentile rank calibration was # done for this range) self.minPeptideLength = 12 self.maxPeptideLength = 24 # define the HMM emission alphabet, including a dedicated "stop" # emission (a GHMM-specific tweak) self.alphabet = ghmm.Alphabet(ghmm.AminoAcids.listOfCharacters + ['Z']) # a value to replace inf in the predictions. Same value as used in # percentile rank calibration self.infSubstitute = 1000 # GHMM has an internal limitation of 1,500,000 sequences in a set self.sequencesPerBlock = 1500000 # load the HMM model and the corresponding percentile rank calibration data from pkg_resources import resource_filename hmmFile = resource_filename(__name__, 'models/hmm-h2iab-iedb2018-binders.xml') prCalibrationFile = resource_filename( __name__, 'models/precent-rank-model-h2iab.npz') self.hmm = ghmm.HMMOpen(hmmFile) prCalibration = np.load(prCalibrationFile) self.prCalibrationCdf = prCalibration['cdf'] self.prCalibrationBinEdges = prCalibration['bin_edges']
#!/usr/bin/env python # -*- coding:utf-8 -*- import ghmm import hmm import predictor import pickle # make a dataset dm = predictor.FastaDataSetMaker() ta = dm.read_from_file("TA_all.fasta", name="ta", label=1) charlist = "ACDEFGHIKLMNPQRSTVWY" char_dic = {charlist[i]: i for i in xrange(20)} ta_n = [[char_dic[c] for c in seq.sequence[::-1]] for seq in ta] # convert hmm g = ghmm.HMMOpen("ta2.1.2.xml") h = hmm.convert_ghmm(g) # Training h.baum_welch(ta_n, iter_limit=1000, threshold=-1e-3, pseudocounts=[0, 1e-3, 0]) g2 = hmm.convert2ghmm(h) f = open("ta_with_noise", "w") pickle.dump(g2, f)
#!/usr/bin/env python # -*- coding: utf-8 -*- import argparse import ghmm as gh import json from qsrrep_lib.rep_hmm import RepHMM if __name__ == "__main__": parser = argparse.ArgumentParser() parser.add_argument("-i", "--input", help="The xml HMM file to convert", type=str) parser.add_argument("-o", "--output", help="The new filename", type=str) args = parser.parse_args() hmm = gh.HMMOpen(fileName=args.input, filetype=gh.GHMM_FILETYPE_XML) trans, emi, start = hmm.asMatrices() ret = { RepHMM.TRANS: trans, RepHMM.EMI: emi, RepHMM.START: start, } with open(args.output, 'w') as f: json.dump(ret, f)