def main(argv): if len(argv) != 3: print 'usage: python run_bayes <category> <quantity> <nfolds>' sys.exit(2) category = argv[0] quantity = int(argv[1]) n_folds = int(argv[2]) print "Category: '%s'\n" % category filters = [ single_words, stopword_filtered_words, bigrams, stopword_filtered_bigrams ] for filt in filters: print "Filter: ", filt.__name__ print "%s-fold stratified cross-validation on %s samples" % ( n_folds, quantity * 2) try: (pos_words, neg_words) = load_samples(category, quantity, filt) except Exception: print("The data for this category and quantity are not found.") sys.exit(2) (accuracy, classifier, train_set, test_set) = naive_bayes(pos_words, neg_words, n_folds) print "accuracy: %s\n" % accuracy classifier.show_most_informative_features() print "\n"
def main(argv): if len(argv) != 3: print 'usage: python run_bayes <category> <quantity> <nfolds>' sys.exit(2) category = argv[0] quantity = int(argv[1]) n_folds = int(argv[2]) print "Category: '%s'\n" % category filters = [single_words, stopword_filtered_words, bigrams, stopword_filtered_bigrams] for filt in filters: print "Filter: ", filt.__name__ print "%s-fold stratified cross-validation on %s samples" % (n_folds, quantity * 2) try: (pos_words, neg_words) = load_samples(category, quantity, filt) except Exception: print("The data for this category and quantity are not found.") sys.exit(2) (accuracy, classifier, train_set, test_set) = naive_bayes(pos_words, neg_words, n_folds) print "accuracy: %s\n" % accuracy classifier.show_most_informative_features() print "\n"
def make_histos_for_syst(var, main_syst, sub_systs, cuts, cuts_antiiso, outdir, indir, channel, coupling, binning=None, plot_range=None, asymmetry=None, mtmetcut=None): if sub_systs.keys()[0] in ["up", "down"] and main_syst in ["Res", "En", "UnclusteredEn"]: ss_type=sub_systs[sub_systs.keys()[0]] elif sub_systs.keys()[0] in ["up", "down"]: ss_type=main_syst + "__" + sub_systs.keys()[0] else: ss_type=main_syst (samples, sampnames) = load_samples(ss_type, channel, indir, coupling) outhists = {} for sn, samps in sampnames: hists = [] for sampn in samps: for sys, sys_types in sub_systs.items(): if sys == "nominal": weight_str = sys_types hname = "%s__%s" % (var, sn) write_histogram(var, hname, weight_str, samples, sn, sampn, cuts, cuts_antiiso, outdir, channel, coupling, binning=binning, plot_range=plot_range, asymmetry=asymmetry, mtmetcut=mtmetcut) elif sn in ["DATA"] and sys != "nominal": #No systematics if data continue elif main_syst in ["Res", "En", "UnclusteredEn"]: if coupling != "powheg": #these systs not available for comphep (currently?) continue hname = "%s__%s__%s__%s" % (var, sn, main_syst, sys) write_histogram(var, hname, Weights.total_weight(channel), samples, sn, sampn, cuts, cuts_antiiso, outdir, channel, coupling, binning=binning, plot_range=plot_range, asymmetry=asymmetry, mtmetcut=mtmetcut) elif main_syst=="nominal": for st_name, st in sys_types.items(): weight_str = st hname = "%s__%s__%s__%s" % (var, sn, sys, st_name) write_histogram(var, hname, weight_str, samples, sn, sampn, cuts, cuts_antiiso, outdir, channel, coupling, binning=binning, plot_range=plot_range, asymmetry=asymmetry, mtmetcut=mtmetcut) else: #main_syst=="partial" hname = "%s__%s__%s" % (var, sn, ss_type) write_histogram(var, hname, Weights.total_weight(channel), samples, sn, sampn, cuts, cuts_antiiso, outdir, channel, coupling, binning=binning, plot_range=plot_range, asymmetry=asymmetry, mtmetcut=mtmetcut)
def main(argv): if len(argv) != 2: print 'usage: python sentiwordnet <category> <quantity>' sys.exit(2) category = argv[0] quantity = int(argv[1]) print "Category: '%s'" % category print "SentiWordNet classification on %s samples\n" % (quantity * 2) try: (pos_reviews, neg_reviews) = load_samples(category, quantity, whole_text) except Exception: print("The data for this category and quantity are not found.") sys.exit(2) reviews = pos_reviews + neg_reviews truth = [review[1] for review in reviews] predictions = [sentiwordnet_classify(review[0]) for review in reviews] accuracy = sum([1 if predictions[i] == truth[i] else 0 for i in range(len(truth))]) / float(len(truth)) print "Accuracy: %s\n" % accuracy print "\n"
def save_data(file_path): samples = load_samples() patient = 0 validation_segment = 0 for i in range(len(samples)): if (i % 15 == 0 and i != 0 and validation_segment < 5): validation_segment += 1 # load up the sample to be looped over s = samples[i] # set the filename for the baseline ECG baseline_record = str(s[0]).zfill(3) + 'a' # and the balloon inflation ECG (referred to elsewhere as the ischaemic ECG) balloon_record = str(s[0]).zfill(3) + s[4] # all baseline ECGs were 300 seconds baseline_seconds = 300 # the first 60 seconds of balloon inflation were discarded balloon_start = int(s[1]) + 60 balloon_seconds = int(s[2]) - 60 # write baseline record: record = wfdb.rdrecord(baseline_record, pb_dir='staffiii/data/') ecg = record.p_signal filename = file_path + '0\\' + str(patient).zfill(2) + '.npy' np.save(filename, ecg) record = wfdb.rdrecord(balloon_record, pb_dir='staffiii/data/') ecg = record.p_signal # write balloon record: filename = file_path + '1\\' + str(patient).zfill(2) + '.npy' np.save( filename, ecg[(balloon_start * 1000):(balloon_start * 1000) + (balloon_seconds * 1000)]) print('Written ' + str(patient + 1) + ' patient files of ' + str(len(samples))) patient += 1
def evaluate_model_on_examples(threshold): samples=load_samples() y_pred=np.load('y_pred.npy') y_true=np.load('y_true.npy') y_counter=0 csv='ECG,Prediction,Truth,Correct' for i in range(len(samples)): s=samples[i] # predict the baseline ECG y_non_ischaemic=int((y_pred[i*2]>threshold)*1) csv+='\n'+str(s[0]).zfill(3)+'a,'+str(y_non_ischaemic)+',0,' if y_non_ischaemic==0: csv+='1' else: csv+='0' # predict the balloon inflation ECG y_ischaemic=int((y_pred[(i*2)+1]>threshold)*1) csv+='\n'+str(s[0]).zfill(3)+s[4]+','+str(y_ischaemic)+',1,' if y_ischaemic==1: csv+='1' else: csv+='0' # write results to file f=open('db_results.csv','w') f.write(csv) f.close
def evaluate_model_on_examples(threshold=0.5): samples=load_samples() model=load_model('model_0.h5') next_model_number=1 y_pred=np.zeros((152,1)) y_true=np.zeros((152,1)) y_counter=0 csv='ECG,Prediction,Truth,Correct' for i in range(len(samples)): s=samples[i] # change models after each hold-out set # this ensures each model is evaluated on patients whose data it has never encountered if (i%15==0 and next_model_number<5 and i!=0): model=load_model('model_'+str(next_model_number)+'.h5') print('Switching to model '+str(next_model_number)+' at patient '+str(i)) next_model_number+=1 # predict baseline ECG y_non_ischaemic=predict_ecg(str(s[0]).zfill(3)+'a',model,0) y_pred[y_counter]=y_non_ischaemic y_true[y_counter]=0 y_counter+=1 y_non_ischaemic=int((y_non_ischaemic>threshold)*1) csv+='\n'+str(s[0]).zfill(3)+'a,'+str(y_non_ischaemic)+',0,' if y_non_ischaemic==0: csv+='1' else: csv+='0' # predict balloon inflation ECG y_ischaemic=predict_ecg(str(s[0]).zfill(3)+s[4],model,int(s[1])+60) y_pred[y_counter]=y_ischaemic y_true[y_counter]=1 y_counter+=1 y_ischaemic=int((y_ischaemic>threshold)*1) csv+='\n'+str(s[0]).zfill(3)+s[4]+','+str(y_ischaemic)+',1,' if y_ischaemic==1: csv+='1' else: csv+='0' # write results to file f=open('db_results.csv','w') f.write(csv) f.close # save results as arrays for threshold search np.save('y_true.npy',y_true) np.save('y_pred.npy',y_pred)
ppv = tp / (tp + fp) results = 'Sensitivity: ' + str(sens) + ' Specificity: ' + str( spec) + ' PPV: ' + str(ppv) print(results) f = open('Results_' + str(model_no) + '.txt', 'w') f.write(results) f.close() if __name__ == "__main__": # this script represents an initial feasibility experiment, hence only limited data are used: non_ischaemic_seconds = 180 ischaemic_seconds = 30 samples = load_samples() # 180 ischaemic seconds + 30 non-ischaemic seconds = 210 seconds per patient # data acquired using sliding windows with 100mS lateral shifts # hence, 10 windows per second = 2100 windows per patient # data is subsequently augmented by flipping voltage of each window, so 2 x 2100 = 4200 windows per patient X = np.zeros((len(samples) * 4200, 9000, 1)) Y = np.zeros((len(samples) * 4200, 1)) X, Y = populate_X_Y(X, Y, samples, non_ischaemic_seconds, ischaemic_seconds) for i in range(len(samples)): # saves all the data to local disk (no augmentation at this stage, so 2100 windows / patient) x_neg = X[i * 2100:(i * 2100) + 1800, :, :] x_pos = X[(i * 2100) + 1800:(i + 1) * 2100:, :]
#!/usr/bin/env python2 # vim:tabstop=4:shiftwidth=4:smarttab:expandtab:softtabstop=4:autoindent: """ this file creates a training file for FANN based on the data created by wiiuse/capture """ import load_samples import os NUM_SAMPLES = 100 NUM_BASIC_MOTIONS = 4 samples = load_samples.load_samples(get_file_name_only = True, input_directory="samples/") new_samples = dict() for key in sorted(samples.keys()): joined_samples = [] for sample in samples[key]: for point in sample: joined_samples.append(point) new_samples[key] = joined_samples num_samples = sum(map(lambda x: len(new_samples[x]), new_samples)) #get number of samples print "total number of samples: %d"%num_samples print len(samples.keys()) output_values = [] for i in range(len(samples.keys())): output = [0]*len(samples.keys()) output[i] = 1 output = map(str, output) output = " ".join(output) output_values.append(output)
#!/usr/bin/env python2 # vim:tabstop=4:shiftwidth=4:smarttab:expandtab:softtabstop=4:autoindent: """ this file creates a training file for FANN based on the data created by wiiuse/capture """ import load_samples samples = load_samples.load_samples(input_directory="wiiuse/samples/simple/") new_samples = dict() for key in sorted(samples.keys()): joined_samples = [] for sample in samples[key]: for point in sample: joined_samples.append(point) new_samples[key] = joined_samples num_samples = sum(map(lambda x: len(new_samples[x]), new_samples)) #get number of samples print "total number of samples: %d"%num_samples output_values = ["1 0 0 0", "0 1 0 0", "0 0 1 0", "0 0 0 1"] f = open("training_file", "w") f.write("%d 6 4\n"%num_samples) for idx, key in enumerate(sorted(new_samples.keys())): for sample in new_samples[key]: f.write(" ".join(map(str, sample))) f.write("\n") f.write(output_values[idx]) f.write("\n") f.close()