Ejemplo n.º 1
0
def write(features):
    with open(features) as f:
        temp = f.readlines()
    samples = {}
    for t in temp:
        a = t.split(" ")
        one_feature = [float(x) for x in a[2:]]
        samples["/".join(a[:2])] = one_feature
    with open("../data/test_diff_sequence.txt") as f:
        _diff = f.readlines()
    with open("../data/test_same_sequence.txt") as f:
        _same = f.readlines()
    _same = [x.strip("\n").split(" ") for x in _same]
    _diff = [x.strip("\n").split(" ") for x in _diff]
    result = []
    for r in _same:
        d = distance.cosine_distnace(samples[r[0]], samples[r[1]])
        r.append(str(d))
        result.append(r)
    for r in _diff:
        d = distance.cosine_distnace(samples[r[0]], samples[r[1]])
        r.append(str(d))
        result.append(r)
    with open("test.txt", "w") as f:
        for s in result:
            # print(s)
            # print(s[0], s[1], s[3])
            f.write("%s %s %s %s\n" % (s[0], s[1], s[2], s[3]))
Ejemplo n.º 2
0
def write_reslut(features, threshold, diff_sequence_file, _same_sequence_file):
    with open(features) as f:
        temp = f.readlines()
    samples = {}
    for t in temp:
        a = t.split(" ")
        one_feature = [float(x) for x in a[2:]]
        samples["/".join(a[:2])] = one_feature
    with open("../data/test_diff_sequence.txt") as f:
        _diff = f.readlines()
    with open("../data/test_same_sequence.txt") as f:
        _same = f.readlines()
    _same = [x.split(" ") for x in _same]
    _diff = [x.split(" ") for x in _diff]
    result = []
    for r in _same:
        d = distance.cosine_distnace(samples[r[0]], samples[r[1]])
        if d >= threshold:
            r.append(1)
        else:
            r.append(0)
        result.append(r)
    for r in _diff:
        d = distance.cosine_distnace(samples[r[0]], samples[r[1]])
        if d >= threshold:
            r.append(1)
        else:
            r.append(0)
        result.append(r)
    with open("test.txt") as f:
        for s in result:
            f.write("%s\n" % " ".join(s))
def ordinary_predict_two_sample(source1,
                                source2,
                                caffemodel,
                                deploy_file,
                                dimension=150,
                                IMAGE_SIZE=227,
                                gpu_mode=True,
                                LAST_LAYER_NAME="ip1"):
    if gpu_mode:
        caffe.set_mode_gpu()
    else:
        caffe.set_mode_cpu()
    net = caffe.Net(deploy_file, caffemodel, caffe.TEST)
    data = np.zeros((2, dimension, IMAGE_SIZE, IMAGE_SIZE))
    data[0, :, :, :] = preprocess.readManyDicom(source=source1,
                                                IMAGE_SIZE=IMAGE_SIZE,
                                                dimension=dimension)
    data[1, :, :, :] = preprocess.readManyDicom(source=source2,
                                                IMAGE_SIZE=IMAGE_SIZE,
                                                dimension=dimension)
    # only for test LeNet
    data = data * 0.00390625
    net.blobs['data'].data[...] = data
    output = net.forward()
    first_sample_feature = output[LAST_LAYER_NAME][0]
    second_sample_feature = output[LAST_LAYER_NAME][1]
    print(distance.cosine_distnace(first_sample_feature,
                                   second_sample_feature))
Ejemplo n.º 4
0
def read_file_and_output_accuracy(features_file):
    with open(features_file) as f:
        temp = f.readlines()
    samples = {}
    for t in temp:
        a = t.split(" ")
        one_feature = [float(x) for x in a[2:]]
        samples["/".join(a[:2])] = one_feature
    with open("../data/test_diff_sequence.txt") as f:
        _diff = f.readlines()
    with open("../data/test_same_sequence.txt") as f:
        _same = f.readlines()
    _same = [x.split(" ") for x in _same]
    _diff = [x.split(" ") for x in _diff][:len(_same)]
    interval = 0.00000000001
    x_values = pylab.arange(0.99999, 1.0 + interval, interval)
    max_accuracy = 0
    max_accuracy_threshold = 0
    same_distance = []
    diff_distance = []
    for r in _same:
        same_distance.append(
            distance.cosine_distnace(samples[r[0]], samples[r[1]]))
    for r in _diff:
        # print r
        diff_distance.append(
            distance.cosine_distnace(samples[r[0]], samples[r[1]]))

# print same_distance
# print diff_distance
# print len(same_distance)
# print len(diff_distance)
    total = len(_same) + len(_diff)
    same_distance = np.array(same_distance)
    diff_distance = np.array(diff_distance)
    # print same_distance, diff_distance
    for threshold in x_values:
        s = np.sum(same_distance >= threshold) + np.sum(
            diff_distance < threshold)
        acc = float(s) / total
        if acc >= max_accuracy:
            max_accuracy = acc
            max_accuracy_threshold = threshold
    print(features_file, max_accuracy_threshold, max_accuracy)
Ejemplo n.º 5
0
def plot_accuracy(features_source, sequence_source):
    '''

    :param features_source: the features txt
    :param sequence_source: the  sequence text
    :return: None
    '''
    '''
    read features from features.txt
    '''
    with open(features_source) as f:
        features = [line.strip("\n").split(" ") for line in f.readlines()]
    '''
    Temporary generate sequence
    '''
    ###################
    _same = {}
    _diff = {}
    _same_distance = []
    _diff_distance = []
    for i in range(int(totals/2)):
        while True:
            x1 = random.randint(0, len(features)-1)
            x2 = random.randint(0, len(features)-1)
            if not (x1, x2) in _same and features[x1][0] == features[x2][0]:
                _same[(x1, x2)] = ''
                break
        while True:
            x1 = random.randint(0, len(features)-1)
            x2 = random.randint(0, len(features)-1)
            if not (x1, x2) in _diff and features[x1][0] != features[x2][0]:
                _diff[(x1, x2)] = ''
                break
        print(i)
    ###################
    #### get the distances
    for x in _same.keys():
        s1, s2 = x
        d = distance.cosine_distnace(np.array(list(map(float, features[s1][2:]))),
                                     np.array(list(map(float, features[s2][2:]))))
        _same_distance.append(d)
        print(x)
    for x in _diff.keys():
        s1, s2 = x
        d = distance.cosine_distnace(np.array(list(map(float, features[s1][2:]))),
                                     np.array(list(map(float, features[s2][2:]))))
        print(x)
        _diff_distance.append(d)
    print("get the distances complete!")
    ####################
    x_values = pylab.arange(-1.0, 1.01, 0.01)
    y_values = []
    threads = []
    for threshold in x_values:
        threads.append(MyThread(threshold, _same_distance, _diff_distance))
        # y_values.append(float(correct)/totals)
    for t in threads:
        t.start()
    time.sleep(500)
    # result = [t.get_result() for t in threads]
    d = accuracy.dic
    acc = sorted(d.items(), key=lambda d:d[0])
    for temp in acc:
        y_values.append(temp[1])
    max_index = np.argmax(y_values)
    plt.title("threshold-accuracy curve")
    plt.xlabel("threshold")
    plt.ylabel("accuracy")
    plt.plot(x_values, y_values)
    plt.plot(x_values[max_index], y_values[max_index], '*', color='red', label="(%s, %s)"%(x_values[max_index], y_values[max_index]))
    plt.legend()
    plt.show()