def distance_hammingword_modular (fm_train_dna=traindna,fm_test_dna=testdna,
		fm_test_real=testdat,order=3,gap=0,reverse=False,use_sign=False):

	from shogun.Features import StringCharFeatures, StringWordFeatures, DNA
	from shogun.Preprocessor import SortWordString
	from shogun.Distance import HammingWordDistance

	charfeat=StringCharFeatures(DNA)
	charfeat.set_features(fm_train_dna)
	feats_train=StringWordFeatures(charfeat.get_alphabet())
	feats_train.obtain_from_char(charfeat, order-1, order, gap, reverse)
	preproc=SortWordString()
	preproc.init(feats_train)
	feats_train.add_preprocessor(preproc)
	feats_train.apply_preprocessor()

	charfeat=StringCharFeatures(DNA)
	charfeat.set_features(fm_test_dna)
	feats_test=StringWordFeatures(charfeat.get_alphabet())
	feats_test.obtain_from_char(charfeat, order-1, order, gap, reverse)
	feats_test.add_preprocessor(preproc)
	feats_test.apply_preprocessor()

	distance=HammingWordDistance(feats_train, feats_train, use_sign)

	dm_train=distance.get_distance_matrix()
	distance.init(feats_train, feats_test)
	dm_test=distance.get_distance_matrix()
	return distance,dm_train,dm_test
Beispiel #2
0
def distance_hammingword_modular(fm_train_dna=traindna,
                                 fm_test_dna=testdna,
                                 fm_test_real=testdat,
                                 order=3,
                                 gap=0,
                                 reverse=False,
                                 use_sign=False):

    from shogun.Features import StringCharFeatures, StringWordFeatures, DNA
    from shogun.Preprocessor import SortWordString
    from shogun.Distance import HammingWordDistance

    charfeat = StringCharFeatures(DNA)
    charfeat.set_features(fm_train_dna)
    feats_train = StringWordFeatures(charfeat.get_alphabet())
    feats_train.obtain_from_char(charfeat, order - 1, order, gap, reverse)
    preproc = SortWordString()
    preproc.init(feats_train)
    feats_train.add_preprocessor(preproc)
    feats_train.apply_preprocessor()

    charfeat = StringCharFeatures(DNA)
    charfeat.set_features(fm_test_dna)
    feats_test = StringWordFeatures(charfeat.get_alphabet())
    feats_test.obtain_from_char(charfeat, order - 1, order, gap, reverse)
    feats_test.add_preprocessor(preproc)
    feats_test.apply_preprocessor()

    distance = HammingWordDistance(feats_train, feats_train, use_sign)

    dm_train = distance.get_distance_matrix()
    distance.init(feats_train, feats_test)
    dm_test = distance.get_distance_matrix()
    return distance, dm_train, dm_test
Beispiel #3
0
def perform_clustering(mss_id):

    import numpy
    import expenv

    mss = expenv.MultiSplitSet.get(mss_id)

    from method_mhc_mkl import SequencesHandler
    from shogun.Distance import EuclidianDistance, HammingWordDistance
    from shogun.Features import StringCharFeatures, StringWordFeatures, PROTEIN
    from shogun.Clustering import Hierarchical
    from shogun.PreProc import SortWordString

    order = 1
    gap = 0
    reverse = False

    seq_handler = SequencesHandler()

    data = [seq_handler.get_seq(ss.dataset.organism) for ss in mss.split_sets]

    charfeat = StringCharFeatures(PROTEIN)
    charfeat.set_features(data)
    feats = StringWordFeatures(charfeat.get_alphabet())
    feats.obtain_from_char(charfeat, order - 1, order, gap, reverse)
    preproc = SortWordString()
    preproc.init(feats)
    feats.add_preproc(preproc)
    feats.apply_preproc()

    use_sign = False

    distance = HammingWordDistance(feats, feats, use_sign)
    #distance = EuclidianDistance()

    merges = 4
    hierarchical = Hierarchical(merges, distance)
    hierarchical.train()

    hierarchical.get_merge_distances()
    hierarchical.get_cluster_pairs()

    return hierarchical