Exemple #1
0
def main(argv):
    if len(argv) != 3:
        print 'usage: python run_bayes <category> <quantity> <nfolds>'
        sys.exit(2)
    category = argv[0]
    quantity = int(argv[1])
    n_folds = int(argv[2])

    print "Category: '%s'\n" % category
    filters = [
        single_words, stopword_filtered_words, bigrams,
        stopword_filtered_bigrams
    ]

    for filt in filters:
        print "Filter: ", filt.__name__
        print "%s-fold stratified cross-validation on %s samples" % (
            n_folds, quantity * 2)

        try:
            (pos_words, neg_words) = load_samples(category, quantity, filt)
        except Exception:
            print("The data for this category and quantity are not found.")
            sys.exit(2)

        (accuracy, classifier, train_set,
         test_set) = naive_bayes(pos_words, neg_words, n_folds)
        print "accuracy: %s\n" % accuracy
        classifier.show_most_informative_features()
        print "\n"
Exemple #2
0
def main(argv):
    if len(argv) != 3:
        print 'usage: python run_bayes <category> <quantity> <nfolds>'
        sys.exit(2)
    category = argv[0]
    quantity = int(argv[1])
    n_folds = int(argv[2])

    print "Category: '%s'\n" % category
    filters = [single_words, stopword_filtered_words, bigrams, stopword_filtered_bigrams]

    for filt in filters:
        print "Filter: ", filt.__name__
        print "%s-fold stratified cross-validation on %s samples" % (n_folds, quantity * 2)

        try:
            (pos_words, neg_words) = load_samples(category, quantity, filt)
        except Exception:
            print("The data for this category and quantity are not found.")
            sys.exit(2)

        (accuracy, classifier, train_set, test_set) = naive_bayes(pos_words, neg_words, n_folds)
        print "accuracy: %s\n" % accuracy
        classifier.show_most_informative_features()
        print "\n"
def make_histos_for_syst(var, main_syst, sub_systs, cuts, cuts_antiiso, outdir, indir, channel, coupling, binning=None, plot_range=None, asymmetry=None, mtmetcut=None):
        if sub_systs.keys()[0] in ["up", "down"] and main_syst in ["Res", "En", "UnclusteredEn"]:
            ss_type=sub_systs[sub_systs.keys()[0]]
        elif sub_systs.keys()[0] in ["up", "down"]:
            ss_type=main_syst + "__" + sub_systs.keys()[0]
        else:
            ss_type=main_syst
        (samples, sampnames) = load_samples(ss_type, channel, indir, coupling)
        
        outhists = {}
        for sn, samps in sampnames:
            hists = []
            for sampn in samps:
                for sys, sys_types in sub_systs.items():
                    if sys == "nominal":
                        weight_str = sys_types
                        hname = "%s__%s" % (var, sn)
                        write_histogram(var, hname, weight_str, samples, sn, sampn, cuts, cuts_antiiso, outdir, channel, coupling, binning=binning, plot_range=plot_range, asymmetry=asymmetry, mtmetcut=mtmetcut)
                    elif sn in ["DATA"] and sys != "nominal":
                        #No systematics if data
                        continue
                    elif main_syst in ["Res", "En", "UnclusteredEn"]:
                        if coupling != "powheg": #these systs not available for comphep (currently?)
                            continue
                        hname = "%s__%s__%s__%s" % (var, sn, main_syst, sys)
                        write_histogram(var, hname, Weights.total_weight(channel), samples, sn, sampn, cuts, cuts_antiiso, outdir, channel, coupling, binning=binning, plot_range=plot_range, asymmetry=asymmetry, mtmetcut=mtmetcut)
                    elif main_syst=="nominal":
                        for st_name, st in sys_types.items():
                            weight_str = st
                            hname = "%s__%s__%s__%s" % (var, sn, sys, st_name)
                            write_histogram(var, hname, weight_str, samples, sn, sampn, cuts, cuts_antiiso, outdir, channel, coupling, binning=binning, plot_range=plot_range, asymmetry=asymmetry, mtmetcut=mtmetcut)
                    else: #main_syst=="partial"
                        hname = "%s__%s__%s" % (var, sn, ss_type)
                        write_histogram(var, hname, Weights.total_weight(channel), samples, sn, sampn, cuts, cuts_antiiso, outdir, channel,  coupling, binning=binning, plot_range=plot_range, asymmetry=asymmetry, mtmetcut=mtmetcut)
def main(argv):
    if len(argv) != 2:
        print 'usage: python sentiwordnet <category> <quantity>'
        sys.exit(2)
    category = argv[0]
    quantity = int(argv[1])

    print "Category: '%s'" % category
    print "SentiWordNet classification on %s samples\n" % (quantity * 2)

    try:
        (pos_reviews, neg_reviews) = load_samples(category, quantity, whole_text)
    except Exception:
        print("The data for this category and quantity are not found.")
        sys.exit(2)

    reviews = pos_reviews + neg_reviews
    truth = [review[1] for review in reviews]

    predictions = [sentiwordnet_classify(review[0]) for review in reviews]
    accuracy = sum([1 if predictions[i] == truth[i] else 0 
                    for i in range(len(truth))]) / float(len(truth))

    print "Accuracy: %s\n" % accuracy
    print "\n"
Exemple #5
0
def save_data(file_path):
    samples = load_samples()
    patient = 0
    validation_segment = 0

    for i in range(len(samples)):
        if (i % 15 == 0 and i != 0 and validation_segment < 5):
            validation_segment += 1
        # load up the sample to be looped over
        s = samples[i]
        # set the filename for the baseline ECG
        baseline_record = str(s[0]).zfill(3) + 'a'
        # and the balloon inflation ECG (referred to elsewhere as the ischaemic ECG)
        balloon_record = str(s[0]).zfill(3) + s[4]
        # all baseline ECGs were 300 seconds
        baseline_seconds = 300
        # the first 60 seconds of balloon inflation were discarded
        balloon_start = int(s[1]) + 60
        balloon_seconds = int(s[2]) - 60
        # write baseline record:
        record = wfdb.rdrecord(baseline_record, pb_dir='staffiii/data/')
        ecg = record.p_signal
        filename = file_path + '0\\' + str(patient).zfill(2) + '.npy'
        np.save(filename, ecg)
        record = wfdb.rdrecord(balloon_record, pb_dir='staffiii/data/')
        ecg = record.p_signal
        # write balloon record:
        filename = file_path + '1\\' + str(patient).zfill(2) + '.npy'
        np.save(
            filename, ecg[(balloon_start * 1000):(balloon_start * 1000) +
                          (balloon_seconds * 1000)])
        print('Written ' + str(patient + 1) + ' patient files of ' +
              str(len(samples)))
        patient += 1
def evaluate_model_on_examples(threshold):
    samples=load_samples()

    y_pred=np.load('y_pred.npy')
    y_true=np.load('y_true.npy')
    y_counter=0

    csv='ECG,Prediction,Truth,Correct'

    for i in range(len(samples)):
        s=samples[i]
        # predict the baseline ECG
        y_non_ischaemic=int((y_pred[i*2]>threshold)*1)
        csv+='\n'+str(s[0]).zfill(3)+'a,'+str(y_non_ischaemic)+',0,'
        if y_non_ischaemic==0:
            csv+='1'
        else:
            csv+='0'
        # predict the balloon inflation ECG
        y_ischaemic=int((y_pred[(i*2)+1]>threshold)*1)
        csv+='\n'+str(s[0]).zfill(3)+s[4]+','+str(y_ischaemic)+',1,'
        if y_ischaemic==1:
            csv+='1'
        else:
            csv+='0'

    # write results to file
    f=open('db_results.csv','w')
    f.write(csv)
    f.close
Exemple #7
0
def evaluate_model_on_examples(threshold=0.5):
    samples=load_samples()

    model=load_model('model_0.h5')
    next_model_number=1

    y_pred=np.zeros((152,1))
    y_true=np.zeros((152,1))
    y_counter=0

    csv='ECG,Prediction,Truth,Correct'

    for i in range(len(samples)):
        s=samples[i]
        # change models after each hold-out set
        # this ensures each model is evaluated on patients whose data it has never encountered
        if (i%15==0 and next_model_number<5 and i!=0):
            model=load_model('model_'+str(next_model_number)+'.h5')
            print('Switching to model '+str(next_model_number)+' at patient '+str(i))
            next_model_number+=1
        # predict baseline ECG
        y_non_ischaemic=predict_ecg(str(s[0]).zfill(3)+'a',model,0)
        y_pred[y_counter]=y_non_ischaemic
        y_true[y_counter]=0
        y_counter+=1
        y_non_ischaemic=int((y_non_ischaemic>threshold)*1)
        csv+='\n'+str(s[0]).zfill(3)+'a,'+str(y_non_ischaemic)+',0,'
        if y_non_ischaemic==0:
            csv+='1'
        else:
            csv+='0'
        # predict balloon inflation ECG
        y_ischaemic=predict_ecg(str(s[0]).zfill(3)+s[4],model,int(s[1])+60)
        y_pred[y_counter]=y_ischaemic
        y_true[y_counter]=1
        y_counter+=1
        y_ischaemic=int((y_ischaemic>threshold)*1)
        csv+='\n'+str(s[0]).zfill(3)+s[4]+','+str(y_ischaemic)+',1,'
        if y_ischaemic==1:
            csv+='1'
        else:
            csv+='0'

    # write results to file
    f=open('db_results.csv','w')
    f.write(csv)
    f.close

    # save results as arrays for threshold search
    np.save('y_true.npy',y_true)
    np.save('y_pred.npy',y_pred)
Exemple #8
0
    ppv = tp / (tp + fp)

    results = 'Sensitivity: ' + str(sens) + ' Specificity: ' + str(
        spec) + ' PPV: ' + str(ppv)
    print(results)
    f = open('Results_' + str(model_no) + '.txt', 'w')
    f.write(results)
    f.close()


if __name__ == "__main__":
    # this script represents an initial feasibility experiment, hence only limited data are used:
    non_ischaemic_seconds = 180
    ischaemic_seconds = 30

    samples = load_samples()

    # 180 ischaemic seconds + 30 non-ischaemic seconds = 210 seconds per patient
    # data acquired using sliding windows with 100mS lateral shifts
    # hence, 10 windows per second = 2100 windows per patient
    # data is subsequently augmented by flipping voltage of each window, so 2 x 2100 = 4200 windows per patient
    X = np.zeros((len(samples) * 4200, 9000, 1))
    Y = np.zeros((len(samples) * 4200, 1))

    X, Y = populate_X_Y(X, Y, samples, non_ischaemic_seconds,
                        ischaemic_seconds)

    for i in range(len(samples)):
        # saves all the data to local disk (no augmentation at this stage, so 2100 windows / patient)
        x_neg = X[i * 2100:(i * 2100) + 1800, :, :]
        x_pos = X[(i * 2100) + 1800:(i + 1) * 2100:, :]
#!/usr/bin/env python2
# vim:tabstop=4:shiftwidth=4:smarttab:expandtab:softtabstop=4:autoindent:
"""
this file creates a training file for FANN based on the data created by wiiuse/capture
"""
import load_samples
import os

NUM_SAMPLES = 100
NUM_BASIC_MOTIONS = 4

samples = load_samples.load_samples(get_file_name_only = True, input_directory="samples/")
new_samples = dict()
for key in sorted(samples.keys()):
    joined_samples = []
    for sample in samples[key]:
        for point in sample:
            joined_samples.append(point)
    new_samples[key] = joined_samples

num_samples = sum(map(lambda x: len(new_samples[x]), new_samples)) #get number of samples
print "total number of samples: %d"%num_samples

print len(samples.keys())
output_values = []
for i in range(len(samples.keys())):
    output = [0]*len(samples.keys())
    output[i] = 1
    output = map(str, output)
    output = " ".join(output)
    output_values.append(output)
#!/usr/bin/env python2
# vim:tabstop=4:shiftwidth=4:smarttab:expandtab:softtabstop=4:autoindent:
"""
this file creates a training file for FANN based on the data created by wiiuse/capture
"""
import load_samples

samples = load_samples.load_samples(input_directory="wiiuse/samples/simple/")
new_samples = dict()
for key in sorted(samples.keys()):
    joined_samples = []
    for sample in samples[key]:
        for point in sample:
            joined_samples.append(point)
    new_samples[key] = joined_samples

num_samples = sum(map(lambda x: len(new_samples[x]), new_samples)) #get number of samples
print "total number of samples: %d"%num_samples

output_values = ["1 0 0 0", "0 1 0 0", "0 0 1 0", "0 0 0 1"]

f = open("training_file", "w")
f.write("%d 6 4\n"%num_samples)

for idx, key in enumerate(sorted(new_samples.keys())):
    for sample in new_samples[key]:
        f.write(" ".join(map(str, sample)))
        f.write("\n")
        f.write(output_values[idx])
        f.write("\n")
f.close()