Example #1
0
def clustering_hierarchical_modular(fm_train=traindat, merges=3):

    from shogun.Distance import EuclidianDistance
    from shogun.Features import RealFeatures
    from shogun.Clustering import Hierarchical

    feats_train = RealFeatures(fm_train)
    distance = EuclidianDistance(feats_train, feats_train)

    hierarchical = Hierarchical(merges, distance)
    hierarchical.train()

    out_distance = hierarchical.get_merge_distances()
    out_cluster = hierarchical.get_cluster_pairs()

    return hierarchical, out_distance, out_cluster
Example #2
0
def perform_clustering(mss_id):

    import numpy
    import expenv

    mss = expenv.MultiSplitSet.get(mss_id)

    from method_mhc_mkl import SequencesHandler
    from shogun.Distance import EuclidianDistance, HammingWordDistance
    from shogun.Features import StringCharFeatures, StringWordFeatures, PROTEIN
    from shogun.Clustering import Hierarchical
    from shogun.PreProc import SortWordString

    order = 1
    gap = 0
    reverse = False

    seq_handler = SequencesHandler()

    data = [seq_handler.get_seq(ss.dataset.organism) for ss in mss.split_sets]

    charfeat = StringCharFeatures(PROTEIN)
    charfeat.set_features(data)
    feats = StringWordFeatures(charfeat.get_alphabet())
    feats.obtain_from_char(charfeat, order - 1, order, gap, reverse)
    preproc = SortWordString()
    preproc.init(feats)
    feats.add_preproc(preproc)
    feats.apply_preproc()

    use_sign = False

    distance = HammingWordDistance(feats, feats, use_sign)
    #distance = EuclidianDistance()

    merges = 4
    hierarchical = Hierarchical(merges, distance)
    hierarchical.train()

    hierarchical.get_merge_distances()
    hierarchical.get_cluster_pairs()

    return hierarchical