コード例 #1
0
def discover(args):
    datafile = args.datafile
    outputfile = args.output
    paramsfile = args.params
    sample_req = args.sample
    sample_flag = False  #used to check whether sample_req exists

    print 'Loading data file...'
    dataloader = DataManager(datafile)
    params = dataloader.getParams(paramsfile)
    dataloader.skipHeadline()
    sample = dataloader.getNextSample()
    targets_list = dataloader.getTargetsList()
    output = file(outputfile, 'w')
    output.write(
        'SAMPLE_ID\tCNV\tFULL_INTERVAL\tINDEX\tINTERVAL\tREAD_DEPTH\n')
    while sample:
        if sample_req == '' or (sample_req != ''
                                and sample['sample_id'] == sample_req):
            sample_flag = True
            #target_index is used to split observations sequence
            target_index_begin = 0
            target_index_end = 0
            temp = 1
            for targets in targets_list:
                print 'Running HMM for sample[' + sample['sample_id'] + ']: ',
                print 'chr' + targets[0]._chr + ' [' + str(temp) + '\\' + str(
                    len(targets_list)) + ']'
                temp += 1
                target_index_end = target_index_begin + len(targets)

                modelParams = ModelParams(params, targets)
                #the 'observations' of sample is splitted
                model = Model(
                    modelParams, sample['observations']
                    [target_index_begin:target_index_end])
                pathlist = model.forwardBackward_Viterbi()
                dataloader.outputCNV(
                    output, sample['sample_id'], targets, pathlist,
                    sample['observations']
                    [target_index_begin:target_index_end])
                target_index_begin = target_index_end
        sample = dataloader.getNextSample()

    output.close()
    dataloader.closeFile()

    if not sample_flag:
        print 'Could not find the sample_id specified.'
コード例 #2
0
ファイル: example.py プロジェクト: Ling-wei/HMM-Using-python
trans_prob = {
    'rainy': {
        'rainy': 0.7,
        'sunny': 0.3
    },
    'sunny': {
        'rainy': 0.4,
        'sunny': 0.6
    }
}

emit_prob = {
    'rainy': {
        'walk': 0.1,
        'shop': 0.4,
        'clean': 0.5
    },
    'sunny': {
        'walk': 0.6,
        'shop': 0.3,
        'clean': 0.1
    }
}

sequence = ['walk', 'shop', 'clean', 'clean', 'walk', 'walk', 'walk', 'clean']
model = Model(states, symbols, start_prob, trans_prob, emit_prob)

print model.evaluate(sequence)
print model.decode(sequence)
コード例 #3
0
    params = dataloader.getParams(paramsfile)
    dataloader.skipHeadline()
    sample = dataloader.getNextSample()
    targets_list = dataloader.getTargetsList()
    output = file(outputfile, 'w')
    while sample:
        #target_index is used to split observations sequence
        target_index_begin = 0
        target_index_end = 0
        temp = 1
        for targets in targets_list:
            print 'Running HMM for sample[' + sample['sample_id'] + ']: ',
            print 'chr' + targets[0]._chr + ' [' + str(temp) + '\\' + str(
                len(targets_list)) + ']'
            temp += 1
            target_index_end = target_index_begin + len(targets)

            modelParams = ModelParams(params, targets)
            #the 'observations' of sample is splitted
            model = Model(
                modelParams,
                sample['observations'][target_index_begin:target_index_end])
            pathlist = model.forwardBackward_Viterbi()
            dataloader.outputCNV(
                output, sample['sample_id'], targets, pathlist,
                sample['observations'][target_index_begin:target_index_end])
            target_index_begin = target_index_end
        sample = dataloader.getNextSample()

    output.close()
コード例 #4
0
conf_prob, trans_prob = pro._tran_conf_prob(train, test_wordcount, word_count,
                                            hidden_states)
'''
test = [["中华人民共和国今天成立了中国人民从此站起来了"],
        ["江泽民的三个代表是中国在社会主义改革过程中的智慧结晶"],
        ["人民日报称改革开发的伟大旗帜要坚定不移动的走下去"],
        ["日理万机的周总理"],
        ["国务院今天颁发了关于农业的改革方向前进步伐"],
        ["机器学习及其翻译激起了人们极其浓厚的兴趣"],
        ["中共中央书记"]]
observations = pro._str2words(test)
'''
observations = test

phi = {'B': 0.5, 'E': 0, 'M': 0, 'S': 0.5}
model = Model(S, observation, phi, trans_prob, conf_prob)
o_hstate = []

for obser in observations:
    '''
    Notice,if a setence is too long,when we use viterbi algorithm it may result in the beta = 0
    There are two solution,one is split the setence into serval sub_setence,another is use log function for the viterbi 
    here we select the first method
    '''
    length = len(obser)
    index, sub_obser, state = 0, [], []
    while index < length:
        sub_obser.append(obser[index])
        if obser[index] == '。' or obser[index] == ',':
            sub_state = model.decode(sub_obser)
            sub_obser = []
コード例 #5
0
#The prob of observation in condition of a hidden state
conf_prob = {
    'rainy': {
        'walk': 0.1,
        'shop': 0.3,
        'clean': 0.6
    },
    'sunny': {
        'walk': 0.4,
        'shop': 0.5,
        'clean': 0.1
    },
    'cloudy': {
        'walk': 0.6,
        'shop': 0.25,
        'clean': 0.15
    }
}

observations = [
    'walk', 'shop', 'clean', 'clean', 'walk', 'walk', 'walk', 'clean'
]
#The iter_num is the iteration number in the EM algorithm
iter_num = 50

model = Model(states, observation, phi, trans_prob, conf_prob, iter_num)

print model.evaluate(observations)
print model.decode(observations)