def write(features): with open(features) as f: temp = f.readlines() samples = {} for t in temp: a = t.split(" ") one_feature = [float(x) for x in a[2:]] samples["/".join(a[:2])] = one_feature with open("../data/test_diff_sequence.txt") as f: _diff = f.readlines() with open("../data/test_same_sequence.txt") as f: _same = f.readlines() _same = [x.strip("\n").split(" ") for x in _same] _diff = [x.strip("\n").split(" ") for x in _diff] result = [] for r in _same: d = distance.cosine_distnace(samples[r[0]], samples[r[1]]) r.append(str(d)) result.append(r) for r in _diff: d = distance.cosine_distnace(samples[r[0]], samples[r[1]]) r.append(str(d)) result.append(r) with open("test.txt", "w") as f: for s in result: # print(s) # print(s[0], s[1], s[3]) f.write("%s %s %s %s\n" % (s[0], s[1], s[2], s[3]))
def write_reslut(features, threshold, diff_sequence_file, _same_sequence_file): with open(features) as f: temp = f.readlines() samples = {} for t in temp: a = t.split(" ") one_feature = [float(x) for x in a[2:]] samples["/".join(a[:2])] = one_feature with open("../data/test_diff_sequence.txt") as f: _diff = f.readlines() with open("../data/test_same_sequence.txt") as f: _same = f.readlines() _same = [x.split(" ") for x in _same] _diff = [x.split(" ") for x in _diff] result = [] for r in _same: d = distance.cosine_distnace(samples[r[0]], samples[r[1]]) if d >= threshold: r.append(1) else: r.append(0) result.append(r) for r in _diff: d = distance.cosine_distnace(samples[r[0]], samples[r[1]]) if d >= threshold: r.append(1) else: r.append(0) result.append(r) with open("test.txt") as f: for s in result: f.write("%s\n" % " ".join(s))
def ordinary_predict_two_sample(source1, source2, caffemodel, deploy_file, dimension=150, IMAGE_SIZE=227, gpu_mode=True, LAST_LAYER_NAME="ip1"): if gpu_mode: caffe.set_mode_gpu() else: caffe.set_mode_cpu() net = caffe.Net(deploy_file, caffemodel, caffe.TEST) data = np.zeros((2, dimension, IMAGE_SIZE, IMAGE_SIZE)) data[0, :, :, :] = preprocess.readManyDicom(source=source1, IMAGE_SIZE=IMAGE_SIZE, dimension=dimension) data[1, :, :, :] = preprocess.readManyDicom(source=source2, IMAGE_SIZE=IMAGE_SIZE, dimension=dimension) # only for test LeNet data = data * 0.00390625 net.blobs['data'].data[...] = data output = net.forward() first_sample_feature = output[LAST_LAYER_NAME][0] second_sample_feature = output[LAST_LAYER_NAME][1] print(distance.cosine_distnace(first_sample_feature, second_sample_feature))
def read_file_and_output_accuracy(features_file): with open(features_file) as f: temp = f.readlines() samples = {} for t in temp: a = t.split(" ") one_feature = [float(x) for x in a[2:]] samples["/".join(a[:2])] = one_feature with open("../data/test_diff_sequence.txt") as f: _diff = f.readlines() with open("../data/test_same_sequence.txt") as f: _same = f.readlines() _same = [x.split(" ") for x in _same] _diff = [x.split(" ") for x in _diff][:len(_same)] interval = 0.00000000001 x_values = pylab.arange(0.99999, 1.0 + interval, interval) max_accuracy = 0 max_accuracy_threshold = 0 same_distance = [] diff_distance = [] for r in _same: same_distance.append( distance.cosine_distnace(samples[r[0]], samples[r[1]])) for r in _diff: # print r diff_distance.append( distance.cosine_distnace(samples[r[0]], samples[r[1]])) # print same_distance # print diff_distance # print len(same_distance) # print len(diff_distance) total = len(_same) + len(_diff) same_distance = np.array(same_distance) diff_distance = np.array(diff_distance) # print same_distance, diff_distance for threshold in x_values: s = np.sum(same_distance >= threshold) + np.sum( diff_distance < threshold) acc = float(s) / total if acc >= max_accuracy: max_accuracy = acc max_accuracy_threshold = threshold print(features_file, max_accuracy_threshold, max_accuracy)
def plot_accuracy(features_source, sequence_source): ''' :param features_source: the features txt :param sequence_source: the sequence text :return: None ''' ''' read features from features.txt ''' with open(features_source) as f: features = [line.strip("\n").split(" ") for line in f.readlines()] ''' Temporary generate sequence ''' ################### _same = {} _diff = {} _same_distance = [] _diff_distance = [] for i in range(int(totals/2)): while True: x1 = random.randint(0, len(features)-1) x2 = random.randint(0, len(features)-1) if not (x1, x2) in _same and features[x1][0] == features[x2][0]: _same[(x1, x2)] = '' break while True: x1 = random.randint(0, len(features)-1) x2 = random.randint(0, len(features)-1) if not (x1, x2) in _diff and features[x1][0] != features[x2][0]: _diff[(x1, x2)] = '' break print(i) ################### #### get the distances for x in _same.keys(): s1, s2 = x d = distance.cosine_distnace(np.array(list(map(float, features[s1][2:]))), np.array(list(map(float, features[s2][2:])))) _same_distance.append(d) print(x) for x in _diff.keys(): s1, s2 = x d = distance.cosine_distnace(np.array(list(map(float, features[s1][2:]))), np.array(list(map(float, features[s2][2:])))) print(x) _diff_distance.append(d) print("get the distances complete!") #################### x_values = pylab.arange(-1.0, 1.01, 0.01) y_values = [] threads = [] for threshold in x_values: threads.append(MyThread(threshold, _same_distance, _diff_distance)) # y_values.append(float(correct)/totals) for t in threads: t.start() time.sleep(500) # result = [t.get_result() for t in threads] d = accuracy.dic acc = sorted(d.items(), key=lambda d:d[0]) for temp in acc: y_values.append(temp[1]) max_index = np.argmax(y_values) plt.title("threshold-accuracy curve") plt.xlabel("threshold") plt.ylabel("accuracy") plt.plot(x_values, y_values) plt.plot(x_values[max_index], y_values[max_index], '*', color='red', label="(%s, %s)"%(x_values[max_index], y_values[max_index])) plt.legend() plt.show()