Example #1
0
def test_prec_recall():
	r1 = ["R", "F", "R", "F", "F", "F", "F", "F", "R", "R"]
	r2 = ["F", "R", "F", "F", "R", "R", "R", "F", "F", "F"]

	precision, recall = utils.precision_recall(r1)
	interpolated_p = utils.interpolate_p(precision)
	print("precision = {0}".format(precision))
	print("recall = {0}".format(recall))
	print("interpolated_p = {0}".format(interpolated_p))

	precision, recall = utils.precision_recall(r2)
	interpolated_p = utils.interpolate_p(precision)
	print("precision = {0}".format(precision))
	print("recall = {0}".format(recall))
	print("interpolated_p = {0}".format(interpolated_p))
Example #2
0
def show_result(sess):
    # print       ("Creating ply files...")

    # bs = 0
    # trData, trLabel = [], []
    # batch_arr = []

    # for item in glob.glob(directory + "/*.ply"):
    # os.remove(item)

    # batch_arr = []
    # name_arr = []
    # counter = 0
    # for item in glob.glob(test_directory + '*.npy'):
    # name_arr.append(str(item[12:]))
    # loaded_file = np.load(item)
    # batch_arr.append(utils.npy_cutter(loaded_file, scene_shape))
    # counter += 1

    # batch_arr = np.reshape( batch_arr, ( -1, scene_shape[0], scene_shape[1], scene_shape[2] ))
    # trData  = batch_arr[ :, 0:scene_shape[0], 0:scene_shape[1], 0:halfed_scene_shape ]               # input
    # trLabel = batch_arr[ :, 0:scene_shape[0], 0:scene_shape[1], halfed_scene_shape:scene_shape[2] ]  # gt
    # trData  = np.reshape(trData, (-1, scene_shape[0] * scene_shape[1] * halfed_scene_shape))

    # score  = sess.run(ConvNet_class.generator, feed_dict={x: trData, keepProb: 1.0, phase: False})
    # score  = np.reshape(score, (counter, scene_shape[0], scene_shape[1], halfed_scene_shape, classes_count))
    # score  = np.argmax(score, 4)
    # trData = np.reshape(trData, (-1, scene_shape[0], scene_shape[1], halfed_scene_shape))

    # for i in range(counter):
    # trData_i = trData[i,:,:,:]
    # trData_i  = np.reshape( trData_i, (scene_shape[0], scene_shape[1], halfed_scene_shape))

    # score_i = score[i,:,:,:]
    # score_i = np.reshape( score_i, (scene_shape[0], scene_shape[1], halfed_scene_shape))

    # empty_scene = np.zeros((84,44,42))
    # empty_space = np.zeros((scene_shape[0], scene_shape[1], 50))
    # empty_scene = np.concatenate((trData_i, empty_scene), axis=2)
    # empty_scene = np.concatenate((empty_scene, empty_space), axis=2)
    # gen_scn = np.concatenate((trData_i, score_i), axis=2)
    # gen_scn = np.concatenate((empty_scene, gen_scn), axis=2)
    # empty_space = np.zeros((scene_shape[0], scene_shape[1], 50))
    # gen_scn = np.concatenate((gen_scn, empty_space), axis=2)
    # gen_scn = np.concatenate((gen_scn, batch_arr[i,:,:,:]), axis=2)

    # output = open( directory + "/" + name_arr[i] + ".ply" , 'w')
    # ply       = ""
    # numOfVrtc = 0
    # for idx1 in range(gen_scn.shape[0]):
    # for idx2 in range(gen_scn.shape[1]):
    # for idx3 in range(gen_scn.shape[2]):
    # if gen_scn[idx1][idx2][idx3] > 0:
    # ply = ply + str(idx1)+ " " +str(idx2)+ " " +str(idx3) + str(utils.colors[ int(gen_scn[idx1][idx2][idx3]) ]) + "\n"
    # numOfVrtc += 1

    # output.write("ply"                                   + "\n")
    # output.write("format ascii 1.0"                      + "\n")
    # output.write("comment VCGLIB generated"              + "\n")
    # output.write("element vertex " +  str(numOfVrtc)     + "\n")
    # output.write("property float x"                      + "\n")
    # output.write("property float y"                      + "\n")
    # output.write("property float z"                      + "\n")
    # output.write("property uchar red"                    + "\n")
    # output.write("property uchar green"                  + "\n")
    # output.write("property uchar blue"                   + "\n")
    # output.write("property uchar alpha"                  + "\n")
    # output.write("element face 0"                        + "\n")
    # output.write("property list uchar int vertex_indices"+ "\n")
    # output.write("end_header"                            + "\n")
    # output.write( ply                                          )
    # output.close()
    # print       (test_data[i][12:] + ".ply" + " is Done!")

    # batch_arr = []
    # name_arr = []
    # counter = 0

    # ###################################################################
    print("Creating ply files...")

    bs = 0
    trData, trLabel = [], []
    batch_arr = []
    precision = np.zeros(classes_count)
    recall = np.zeros(classes_count)
    accu1_all, accu2_all = 0.0, 0.0

    for counter in range(num_of_vis_batch):
        trData, trLabel = [], []
        batch_arr = []
        batch_arr_2d = []
        bs = 0

        test_data = utils.fetch_random_batch(train_directory, batch_size)

        for test in test_data:
            loaded_file = np.load(test)
            batch_arr.append(utils.npy_cutter(loaded_file, scene_shape))
            bs += 1

        batch_arr = np.reshape(
            batch_arr, (bs, scene_shape[0], scene_shape[1], scene_shape[2]))
        trData = batch_arr[:, 0:scene_shape[0], 0:scene_shape[1],
                           0:halfed_scene_shape]  # input
        trLabel = batch_arr[:, 0:scene_shape[0], 0:scene_shape[1],
                            halfed_scene_shape:scene_shape[2]]  # gt
        trData = np.reshape(
            trData, (-1, scene_shape[0] * scene_shape[1] * halfed_scene_shape))

        score = sess.run(ConvNet_class.generator,
                         feed_dict={
                             x: trData,
                             keepProb: 1.0,
                             phase: False
                         })
        score = np.reshape(score, (-1, scene_shape[0], scene_shape[1],
                                   halfed_scene_shape, classes_count))
        score = np.argmax(score, 4)
        score = np.reshape(
            score, (-1, scene_shape[0], scene_shape[1], halfed_scene_shape))
        pre, rec = utils.precision_recall(score, trLabel, batch_size,
                                          classes_count)
        precision += pre
        recall += rec

        accu1, accu2 = accuFun(sess, trData, trLabel, bs)
        accu1_all += accu1
        accu2_all += accu2
        logging.info("A1: %g, A2: %g" % (accu1, accu2))
        print("A1: %g, A2: %g" % (accu1, accu2))

    print precision / num_of_vis_batch * 1.0
    print recall / num_of_vis_batch * 1.0
    print accu1_all / num_of_vis_batch * 1.0
    print accu2_all / num_of_vis_batch * 1.0
Example #3
0
    def eval(self,
             query,
             retrieval,
             similarity_matrix,
             query_modal,
             retrieval_modal,
             dis_metric,
             radius=None):
        saver = tf.train.Saver(var_list=self.image_encoder_vars +
                               self.text_encoder_vars)

        config = tf.ConfigProto()
        config.gpu_options.allow_growth = True

        with tf.Session(config=config) as sess:
            saver.restore(sess, 'saved_model/model.ckpt')

            if query_modal == 'img':
                query_latent = sess.run(self.image_latent,
                                        feed_dict={
                                            self.image_ph: query,
                                            self.keep_prob_ph: 1.0,
                                            self.training_ph: False
                                        })
            else:
                query_latent = sess.run(self.text_latent,
                                        feed_dict={
                                            self.text_ph: query,
                                            self.keep_prob_ph: 1.0,
                                            self.training_ph: False
                                        })

            if retrieval_modal == 'img':
                retrieval_latent = sess.run(self.image_latent,
                                            feed_dict={
                                                self.image_ph: retrieval,
                                                self.keep_prob_ph: 1.0,
                                                self.training_ph: False
                                            })
            else:
                retrieval_latent = sess.run(self.text_latent,
                                            feed_dict={
                                                self.text_ph: retrieval,
                                                self.keep_prob_ph: 1.0,
                                                self.training_ph: False
                                            })

        MAP = optimized_mAP(query_latent,
                            retrieval_latent,
                            similarity_matrix,
                            dis_metric=dis_metric)
        top_k_precision = precision_top_k(query_latent, retrieval_latent,
                                          similarity_matrix,
                                          [10, 20, 50, 100, 500], dis_metric)

        if dis_metric == 'hash':
            MAP_comp = optimized_mAP(query_latent,
                                     retrieval_latent,
                                     similarity_matrix,
                                     dis_metric='cosine')
            top_k_precision_comp = precision_top_k(query_latent,
                                                   retrieval_latent,
                                                   similarity_matrix,
                                                   [10, 20, 50, 100, 500],
                                                   dis_metric='cosine')
            precision, recall = precision_recall(query_latent,
                                                 retrieval_latent,
                                                 similarity_matrix)

            return MAP, top_k_precision, MAP_comp, top_k_precision_comp, precision, recall

        return MAP, top_k_precision
Example #4
0
def main():
    ll_data_2g = utils.gongcan_to_ll()
    train_data = utils.ll_to_grid(ll_data_2g)

    # print(train_data)
    # 删除原有的ID,不作为训练特征
    for i in range(1, 8):
        train_data.drop(['RNCID_' + str(i)], axis=1, inplace=True)
        train_data.drop(['CellID_' + str(i)], axis=1, inplace=True)
    # 将空余的信号强度,用0补填补
    train_data = train_data.fillna(0)

    # features和labels
    X = train_data.drop(
        ['MRTime', 'Longitude', 'Latitude', 'Num_connected', 'grid_num'],
        axis=1,
        inplace=False).as_matrix()
    y = train_data[['grid_num', 'Longitude', 'Latitude']].as_matrix()
    # 通过设置每一次的随机数种子,保证不同分类器每一次的数据集是一样的
    random_states = [2, 4, 6, 8, 10, 12, 14, 16, 18, 20]

    errors_all = []
    top10_pres_all = []
    top10_recalls_all = []
    top10_fs_all = []
    overall_pres_all = []

    # 高斯朴素贝叶斯分类器
    start = datetime.datetime.now()
    errors = []
    overall_pres = []
    top10_pres = []
    top10_recalls = []
    top10_fs = []
    for i in range(10):
        # 切分训练集和验证集
        X_train, X_test, y_train, y_test = train_test_split(
            X, y, test_size=0.2, random_state=random_states[i])

        gnb = GaussianNB()
        y_pred = gnb.fit(np.delete(X_train, 0, axis=1),
                         y_train[:, 0]).predict(np.delete(X_test, 0, axis=1))
        overall_pre, top10_pre, top10_recall, top10_f = utils.precision_recall(
            y_test[:, 0], y_pred)
        overall_pres.append(overall_pre)
        top10_pres.append(top10_pre)
        top10_recalls.append(top10_recall)
        top10_fs.append(top10_f)
        errors.append(utils.pos_error(y_test, y_pred))

    print("Gaussian")
    print("Overall precision: %.3f" % np.mean(np.array(overall_pres)))
    print("Top10 precision: %.3f" % np.array(top10_pres).mean(axis=0).mean())
    print("Top10 recall: %.3f" % np.array(top10_recalls).mean(axis=0).mean())
    print("Top10 f-measurement: %.3f" % np.array(top10_fs).mean(axis=0).mean())
    print("Median error: {}".format(
        np.percentile(np.array(errors).mean(axis=0), 50)))
    print("Time spend: {}".format(datetime.datetime.now() - start))
    errors_all.append(errors)
    top10_recalls_all.append(np.array(top10_recalls).mean(axis=0).mean())
    top10_pres_all.append(np.array(top10_pres).mean(axis=0).mean())
    overall_pres_all.append(np.mean(np.array(overall_pres)))
    top10_fs_all.append(np.array(top10_fs).mean(axis=0).mean())
    print("****************************")

    # K近邻分类器
    start = datetime.datetime.now()
    errors = []
    overall_pres = []
    top10_pres = []
    top10_recalls = []
    top10_fs = []
    for i in range(10):
        # 切分训练集和验证集
        X_train, X_test, y_train, y_test = train_test_split(
            X, y, test_size=0.2, random_state=random_states[i])

        neigh = KNeighborsClassifier()
        y_pred = neigh.fit(np.delete(X_train, 0, axis=1),
                           y_train[:, 0]).predict(np.delete(X_test, 0, axis=1))
        overall_pre, top10_pre, top10_recall, top10_f = utils.precision_recall(
            y_test[:, 0], y_pred)
        overall_pres.append(overall_pre)
        top10_pres.append(top10_pre)
        top10_recalls.append(top10_recall)
        top10_fs.append(top10_f)
        errors.append(utils.pos_error(y_test, y_pred))

    print("KNeighbors")
    print("Overall precision: %.3f" % np.mean(np.array(overall_pres)))
    print("Top10 precision: %.3f" % np.array(top10_pres).mean(axis=0).mean())
    print("Top10 recall: %.3f" % np.array(top10_recalls).mean(axis=0).mean())
    print("Top10 f-measurement: %.3f" % np.array(top10_fs).mean(axis=0).mean())
    print("Median error: {}".format(
        np.percentile(np.array(errors).mean(axis=0), 50)))
    print("Time spend: {}".format(datetime.datetime.now() - start))
    errors_all.append(errors)
    top10_recalls_all.append(np.array(top10_recalls).mean(axis=0).mean())
    top10_pres_all.append(np.array(top10_pres).mean(axis=0).mean())
    overall_pres_all.append(np.mean(np.array(overall_pres)))
    top10_fs_all.append(np.array(top10_fs).mean(axis=0).mean())
    print("****************************")

    # 决策树分类器
    start = datetime.datetime.now()
    errors = []
    overall_pres = []
    top10_pres = []
    top10_recalls = []
    top10_fs = []
    for i in range(10):
        # 切分训练集和验证集
        X_train, X_test, y_train, y_test = train_test_split(
            X, y, test_size=0.2, random_state=random_states[i])

        clf = DecisionTreeClassifier()
        y_pred = clf.fit(np.delete(X_train, 0, axis=1),
                         y_train[:, 0]).predict(np.delete(X_test, 0, axis=1))
        overall_pre, top10_pre, top10_recall, top10_f = utils.precision_recall(
            y_test[:, 0], y_pred)
        overall_pres.append(overall_pre)
        top10_pres.append(top10_pre)
        top10_recalls.append(top10_recall)
        top10_fs.append(top10_f)
        errors.append(utils.pos_error(y_test, y_pred))

    print("DecisionTree")
    print("Overall precision: %.3f" % np.mean(np.array(overall_pres)))
    print("Top10 precision: %.3f" % np.array(top10_pres).mean(axis=0).mean())
    print("Top10 recall: %.3f" % np.array(top10_recalls).mean(axis=0).mean())
    print("Top10 f-measurement: %.3f" % np.array(top10_fs).mean(axis=0).mean())
    print("Median error: {}".format(
        np.percentile(np.array(errors).mean(axis=0), 50)))
    print("Time spend: {}".format(datetime.datetime.now() - start))
    errors_all.append(errors)
    top10_recalls_all.append(np.array(top10_recalls).mean(axis=0).mean())
    top10_pres_all.append(np.array(top10_pres).mean(axis=0).mean())
    overall_pres_all.append(np.mean(np.array(overall_pres)))
    top10_fs_all.append(np.array(top10_fs).mean(axis=0).mean())
    print("****************************")

    # 随机森林
    start = datetime.datetime.now()
    errors = []
    overall_pres = []
    top10_pres = []
    top10_recalls = []
    top10_fs = []
    for i in range(10):
        # 切分训练集和验证集
        X_train, X_test, y_train, y_test = train_test_split(
            X, y, test_size=0.2, random_state=random_states[i])

        clf = RandomForestClassifier(max_depth=20, random_state=0)
        y_pred = clf.fit(np.delete(X_train, 0, axis=1),
                         y_train[:, 0]).predict(np.delete(X_test, 0, axis=1))
        overall_pre, top10_pre, top10_recall, top10_f = utils.precision_recall(
            y_test[:, 0], y_pred)
        overall_pres.append(overall_pre)
        top10_pres.append(top10_pre)
        top10_recalls.append(top10_recall)
        top10_fs.append(top10_f)
        errors.append(utils.pos_error(y_test, y_pred))

    print("RandomForest")
    print("Overall precision: %.3f" % np.mean(np.array(overall_pres)))
    print("Top10 precision: %.3f" % np.array(top10_pres).mean(axis=0).mean())
    print("Top10 recall: %.3f" % np.array(top10_recalls).mean(axis=0).mean())
    print("Top10 f-measurement: %.3f" % np.array(top10_fs).mean(axis=0).mean())
    print("Median error: {}".format(
        np.percentile(np.array(errors).mean(axis=0), 50)))
    print("Time spend: {}".format(datetime.datetime.now() - start))
    errors_all.append(errors)
    top10_recalls_all.append(np.array(top10_recalls).mean(axis=0).mean())
    top10_pres_all.append(np.array(top10_pres).mean(axis=0).mean())
    overall_pres_all.append(np.mean(np.array(overall_pres)))
    top10_fs_all.append(np.array(top10_fs).mean(axis=0).mean())
    print("****************************")

    # AdaBoost
    start = datetime.datetime.now()
    errors = []
    overall_pres = []
    top10_pres = []
    top10_recalls = []
    top10_fs = []
    for i in range(10):
        # 切分训练集和验证集
        X_train, X_test, y_train, y_test = train_test_split(
            X, y, test_size=0.2, random_state=random_states[i])

        clf = AdaBoostClassifier(
            base_estimator=DecisionTreeClassifier(max_depth=20),
            learning_rate=0.01,
            n_estimators=30,
            algorithm='SAMME.R')
        y_pred = clf.fit(np.delete(X_train, 0, axis=1),
                         y_train[:, 0]).predict(np.delete(X_test, 0, axis=1))
        overall_pre, top10_pre, top10_recall, top10_f = utils.precision_recall(
            y_test[:, 0], y_pred)
        overall_pres.append(overall_pre)
        top10_pres.append(top10_pre)
        top10_recalls.append(top10_recall)
        top10_fs.append(top10_f)
        errors.append(utils.pos_error(y_test, y_pred))

    print("AdaBoost")
    print("Overall precision: %.3f" % np.mean(np.array(overall_pres)))
    print("Top10 precision: %.3f" % np.array(top10_pres).mean(axis=0).mean())
    print("Top10 recall: %.3f" % np.array(top10_recalls).mean(axis=0).mean())
    print("Top10 f-measurement: %.3f" % np.array(top10_fs).mean(axis=0).mean())
    print("Median error: {}".format(
        np.percentile(np.array(errors).mean(axis=0), 50)))
    print("Time spend: {}".format(datetime.datetime.now() - start))
    errors_all.append(errors)
    top10_recalls_all.append(np.array(top10_recalls).mean(axis=0).mean())
    top10_pres_all.append(np.array(top10_pres).mean(axis=0).mean())
    overall_pres_all.append(np.mean(np.array(overall_pres)))
    top10_fs_all.append(np.array(top10_fs).mean(axis=0).mean())
    print("****************************")

    # Bagging
    start = datetime.datetime.now()
    errors = []
    overall_pres = []
    top10_pres = []
    top10_recalls = []
    top10_fs = []
    for i in range(10):
        # 切分训练集和验证集
        X_train, X_test, y_train, y_test = train_test_split(
            X, y, test_size=0.2, random_state=random_states[i])

        clf = BaggingClassifier(n_estimators=20)
        y_pred = clf.fit(np.delete(X_train, 0, axis=1),
                         y_train[:, 0]).predict(np.delete(X_test, 0, axis=1))
        overall_pre, top10_pre, top10_recall, top10_f = utils.precision_recall(
            y_test[:, 0], y_pred)
        overall_pres.append(overall_pre)
        top10_pres.append(top10_pre)
        top10_recalls.append(top10_recall)
        top10_fs.append(top10_f)
        errors.append(utils.pos_error(y_test, y_pred))

    print("Bagging")
    print("Overall precision: %.3f" % np.mean(np.array(overall_pres)))
    print("Top10 precision: %.3f" % np.array(top10_pres).mean(axis=0).mean())
    print("Top10 recall: %.3f" % np.array(top10_recalls).mean(axis=0).mean())
    print("Top10 f-measurement: %.3f" % np.array(top10_fs).mean(axis=0).mean())
    print("Median error: {}".format(
        np.percentile(np.array(errors).mean(axis=0), 50)))
    print("Time spend: {}".format(datetime.datetime.now() - start))
    errors_all.append(errors)
    top10_recalls_all.append(np.array(top10_recalls).mean(axis=0).mean())
    top10_pres_all.append(np.array(top10_pres).mean(axis=0).mean())
    overall_pres_all.append(np.mean(np.array(overall_pres)))
    top10_fs_all.append(np.array(top10_fs).mean(axis=0).mean())
    print("****************************")

    # GradientBoosting
    start = datetime.datetime.now()
    errors = []
    overall_pres = []
    top10_pres = []
    top10_recalls = []
    top10_fs = []
    for i in range(10):
        print(i)
        # 切分训练集和验证集
        X_train, X_test, y_train, y_test = train_test_split(
            X, y, test_size=0.2, random_state=random_states[i])

        clf = GradientBoostingClassifier(n_estimators=60, learning_rate=0.01)
        y_pred = clf.fit(np.delete(X_train, 0, axis=1),
                         y_train[:, 0]).predict(np.delete(X_test, 0, axis=1))
        overall_pre, top10_pre, top10_recall, top10_f = utils.precision_recall(
            y_test[:, 0], y_pred)
        overall_pres.append(overall_pre)
        top10_pres.append(top10_pre)
        top10_recalls.append(top10_recall)
        top10_fs.append(top10_f)
        errors.append(utils.pos_error(y_test, y_pred))

    print("GradientBoosting")
    print("Overall precision: %.3f" % np.mean(np.array(overall_pres)))
    print("Top10 precision: %.3f" % np.array(top10_pres).mean(axis=0).mean())
    print("Top10 recall: %.3f" % np.array(top10_recalls).mean(axis=0).mean())
    print("Top10 f-measurement: %.3f" % np.array(top10_fs).mean(axis=0).mean())
    print("Median error: {}".format(
        np.percentile(np.array(errors).mean(axis=0), 50)))
    print("Time spend: {}".format(datetime.datetime.now() - start))
    errors_all.append(errors)
    top10_recalls_all.append(np.array(top10_recalls).mean(axis=0).mean())
    top10_pres_all.append(np.array(top10_pres).mean(axis=0).mean())
    overall_pres_all.append(np.mean(np.array(overall_pres)))
    top10_fs_all.append(np.array(top10_fs).mean(axis=0).mean())
    print("****************************")

    utils.cdf_figure(errors_all)
    utils.figure(overall_pres_all, top10_pres_all, top10_recalls_all,
                 top10_fs_all)
Example #5
0
        model.load_weights(options.model_weights)
        opt = keras.optimizers.Adam(float(options.lr))
        model.compile(loss='binary_crossentropy',
                      optimizer=opt,
                      metrics=['accuracy'])
        raise KeyboardInterrupt

    print 'Loss and Validations history stored in ', options.outputFile + '_' + curr_time
    #model.save_weights('../Output/ModelParams/LSTM_params_BiDi_%s.h5'%curr_time)
    print 'Best Model parameters stored at ../Output/ModelParams/LSTM_BiDi_%s.h5' % curr_time
    mfalseLength, mactualLength = utils.analyze_false(validData,
                                                      validDataNumbers,
                                                      validLabels, model)
    c1p, c1r, c0p, c0r, acc, c1f, c0f = utils.precision_recall(
        validDataNumbers,
        validLabels,
        model,
        weightsPath=options.outputWeights)
    print 'Run %d results :-' % (run + 1)
    scores['c1_precision'].append(c1p)
    scores['c1_recall'].append(c1r)
    scores['c0_precision'].append(c0p)
    scores['c0_recall'].append(c0r)
    scores['accuracy'].append(acc)
    scores['c1_fscore'].append(c1f)
    scores['c0_fscore'].append(c0f)
    scores['mean_actual_length'].append(mactualLength)
    scores['mean_false_length'].append(mfalseLength)
    scores['sample_info'].append(
        (len(trainData), pos_train_samples, len(validData), pos_valid_samples))
		curr_time =  datetime.datetime.strftime(datetime.datetime.now(), '%dth-%H:%M:%S')
		with open(options.outputFile+'_'+curr_time + '.pkl','w') as f:
			json.dump(Hist.history,f)

	elif os.path.exists(options.model_weights):
		model.load_weights(options.model_weights)
		opt = keras.optimizers.Adam(float(options.lr))
		model.compile(loss = 'binary_crossentropy',optimizer = opt,metrics = ['accuracy'] )
		raise KeyboardInterrupt


	print 'Loss and Validations history stored in ',options.outputFile+'_'+curr_time
	#model.save_weights('../Output/ModelParams/LSTM_params_BiDi_%s.h5'%curr_time)
	print 'Best Model parameters stored at ../Output/ModelParams/LSTM_BiDi_%s.h5' %curr_time
	mfalseLength,mactualLength = utils.analyze_false(validData,validDataNumbers,validLabels,model)
	c1p,c1r,c0p,c0r,acc,c1f,c0f = utils.precision_recall(validDataNumbers,validLabels,model,weightsPath = options.outputWeights)
	print 'Run %d results :-' %(run+1)
	scores['c1_precision'].append(c1p)
	scores['c1_recall'].append(c1r)
	scores['c0_precision'].append(c0p)
	scores['c0_recall'].append(c0r)
	scores['accuracy'].append(acc)
	scores['c1_fscore'].append(c1f)
	scores['c0_fscore'].append(c0f)
	scores['mean_actual_length'].append(mactualLength)
	scores['mean_false_length'].append(mfalseLength)
	scores['sample_info'].append((len(trainData),pos_train_samples,len(validData),pos_valid_samples))

results_info = curr_time + '\tHyperParameters:- \nWord-Index Dictionary : %s \tWordvectors file : %s \tLearning Rate : %f \t split-ratio : %f \tEpochs : %d \tOutput Weights : %s \tResults File : %s\n Neurons : %s \nModel-Layers: %s\nResults averaged over %d runs' %(options.vec_dict, options.pretrained, float(options.lr), float(options.split), int(options.nEpochs),options.outputWeights, options.outputFile,str(options.neurons),str(options.nLayers),int(options.runs))
results += '\nLabel : %s' %options.label
Example #7
0
def main():
    overall_start_time = datetime.datetime.now()

    # Allow user to compare only a subset of the faces
    (
        number_of_people_to_scan,
        attempting_all,
        file_str_prefix,
        peoples_faces_to_scan,
    ) = get_number_faces_to_scan(lfw_path, overall_start_time)

    # Build up encodings dataset
    all_encodings, encodings_start_time, lists_of_images = encodings_builder(
        lfw_path, number_of_people_to_scan, peoples_faces_to_scan,
        IMAGES_TO_EXCLUDE)

    # Compare the encodings
    (
        same_face_distances_df,
        different_face_distances_df,
        comparisons_start_time,
        comparison_counter,
    ) = encodings_comparer(all_encodings)

    # Make graphs
    graph_start_time = all_graphs(
        same_face_distances_df,
        different_face_distances_df,
        comparison_counter,
        lists_of_images,
        file_str_prefix,
        doing_graphs,
        CUMULATIVE_GRAPHS,
    )

    # Calculate precision and recall
    precision_recall_start_time = precision_recall(
        same_face_distances_df,
        different_face_distances_df,
        file_str_prefix,
        doing_precision_recall,
    )

    # Find lookalikes and different-looking images of same person
    (
        different_face_distances_df_sorted,
        same_face_distances_df_sorted,
    ) = output_most_similar_different_people_and_most_different_same_faces(
        different_face_distances_df, same_face_distances_df, file_str_prefix)

    # Image of lookalikes etc
    combine_face_images(different_face_distances_df_sorted, file_str_prefix,
                        "_8_lookalikes.jpg")
    combine_face_images(
        same_face_distances_df_sorted,
        file_str_prefix,
        "_9_different_looking_same_people.jpg",
    )

    # First names wordcloud
    plot_first_names_wordcloud(file_str_prefix, lists_of_images)

    # Write out timings and info about images that failed
    run_outputs(
        attempting_all,
        overall_start_time,
        encodings_start_time,
        comparisons_start_time,
        graph_start_time,
        precision_recall_start_time,
        file_str_prefix,
        lists_of_images,
    )
Example #8
0
    def train(self, tr_X, tr_y, te_X, te_y, batchSize=32, maxIter=50,
              start=10, period=2, threshold=10, earlyStopTol=2, totalStopTol=2):
        trainfn = self.trainfn
        lr = self.lr
        tr_va_split = int(tr_X.shape[0] * 0.7)
        tr_X, va_X = tr_X[:tr_va_split], tr_X[tr_va_split:]
        tr_y, va_y = tr_y[:tr_va_split], tr_y[tr_va_split:]

        earlyStop = earlyStopGen(start, period, threshold, earlyStopTol)
        earlyStop.next()  # 初始化生成器
        totalStopCount = 0
        for epoch in xrange(maxIter):  # every epoch
            # In each epoch, we do a full pass over the training data:
            trAllPred = None
            trRandy = None
            trCostSum = 0.
            startTime = time.time()
            for batch in miniBatchGen(tr_X, tr_y, batchSize, shuffle=True):
                Xb, yb = batch
                trCost, trPred = trainfn(Xb, yb)
                trCostSum += trCost
                trAllPred = np.concatenate((trAllPred, trPred), axis=0) \
                    if trAllPred is not None else trPred
                trRandy = np.concatenate((trRandy, yb)) if trRandy is not None else yb
            trIter = len(tr_X) // batchSize
            if len(tr_X) % batchSize != 0: trIter += 1
            trCostMean = trCostSum / trIter
            trAcc = accuracy(trAllPred, trRandy)
            trP, trR = precision_recall(trAllPred, trRandy)
            # And a full pass over the validation data:
            vaAllPred = None
            vaCostSum = 0.
            for batch in miniBatchGen(va_X, va_y, batchSize, shuffle=False):
                Xb, yb = batch
                vaCost, vaPred = self.vatefn(Xb, yb)
                vaCostSum += vaCost
                vaAllPred = np.concatenate((vaAllPred, vaPred), axis=0) \
                    if vaAllPred is not None else vaPred
            vaIter = len(va_X) // batchSize
            if len(va_X) % batchSize != 0: vaIter += 1
            vaCostMean = vaCostSum / vaIter
            vaAcc = accuracy(vaAllPred, va_y)
            vaP, vaR = precision_recall(vaAllPred, va_y)
            print 'epoch ', epoch, ' time: %.3f' % (time.time() - startTime),
            print 'trcost: %.5f  tracc: %.5f  trp: %.5f  trr: %.5f' % (trCostMean, trAcc, trP, trR),
            print 'vacost: %.5f  vaacc: %.5f  vap: %.5f  var: %.5f' % (vaCostMean, vaAcc, vaP, vaR)
            # Then we decide whether to early stop:
            if earlyStop.send((trCostMean, vaCostMean)):
                lr /= 10  # 如果一次早停止发生,则学习率降低继续迭代
                updatesDict = updates.nesterov_momentum(self.trCost, self.params, lr, self.momentum)
                trainfn = makeFunc([self.X, self.y], [self.trCost, self.yDropProb], updatesDict)
                totalStopCount += 1
                if totalStopCount > totalStopTol:  # 如果学习率降低仍然发生早停止,则退出迭代
                    print 'stop'
                    break
                print 'learning rate decreases to ', lr
        ################################################################################################################
        self.istrained = True
        params = layers.get_all_param_values(self.outprob)
        cPickle.dump(params, open(dataset_path + 'plain_cnn.pkl', 'w'))
        ################################################################################################################
        teAllPred = None
        teCostSum = 0.
        for batch in miniBatchGen(te_X, te_y, batchSize, shuffle=False):
            Xb, yb = batch
            teCost, tePred = self.vatefn(Xb, yb)
            teCostSum += teCost
            teAllPred = np.concatenate((teAllPred, tePred), axis=0) \
                if teAllPred is not None else tePred
        teIter = len(te_X) // batchSize
        if len(te_X) % batchSize != 0: teIter += 1
        teCostMean = teCostSum / teIter
        teAcc = accuracy(teAllPred, te_y)
        teP, teR = precision_recall(teAllPred, te_y)
        print 'tecost: %.5f  teacc: %.5f  tep: %.5f  ter: %.5f' % (teCostMean, teAcc, teP, teR)
Example #9
0
        y_hat = model(x)
        loss = K.binary_crossentropy(y, y_hat)

        # Weight the loss
        pos_weights = y * pos_weight
        neg_weights = (1.0 - y) * neg_weight
        loss_weights = pos_weights + neg_weights

        loss_weighted = tf.reduce_mean(loss * loss_weights)

        grads = tape.gradient(loss_weighted, model.weights)
        optimizer.apply_gradients(zip(grads, model.weights))

        acc = utils.accuracy(y, y_hat)
        acc_topk = utils.accuracy_topk(y, y_hat)
        precision, recall = utils.precision_recall(y, y_hat)
        auc_metric = tf.keras.metrics.AUC()
        auc_metric.update_state(y, y_hat)
        tf.summary.image('Inputs', x, step=step)
        tf.summary.scalar('ClassLoss', loss_weighted, step=step)
        tf.summary.scalar('Acc', acc, step=step)
        tf.summary.scalar('AuC', auc_metric.result(), step=step)
        tf.summary.scalar('AccTopK', acc_topk, step=step)
        tf.summary.scalar('Precision', precision, step=step)
        tf.summary.scalar('Recall', recall, step=step)
        tf.summary.histogram('Labels', y, step=step)
        tf.summary.histogram('Predictions', y_hat, step=step)
        auc_metric.reset_states()

        print('Step: ', step,
              acc.numpy() * 100, precision.numpy(), recall.numpy(),
Example #10
0
def validation(x_valid, y_valid, val_batch_size, num_classes, sess, model, epoch, start_time, w_plus):
    loss_batch_all = np.array([])
    acc_batch_all = y_pred_all = logits_all = np.zeros((0, num_classes))
    model.is_train = False
    x_valid, y_valid = randomize(x_valid, y_valid)
    step_count = int(len(x_valid) / val_batch_size)

    for step in range(step_count):
        start = step * val_batch_size
        end = (step + 1) * val_batch_size
        x_batch, y_batch = get_next_batch(x_valid, y_valid, start, end)

        feed_dict_val = {model.x: x_batch, model.y: y_batch, model.w_plus: w_plus}
        acc_valid, loss_valid, y_pred, logits = sess.run(
            [model.accuracy, model.loss, model.prediction, model.get_logits],
            feed_dict=feed_dict_val)

        acc_batch_all = np.concatenate((acc_batch_all, acc_valid.reshape([1, num_classes])))
        y_pred_all = np.concatenate((y_pred_all, y_pred.reshape([val_batch_size, num_classes])))
        logits_all = np.concatenate((logits_all, logits.reshape([val_batch_size, num_classes])))
        loss_batch_all = np.append(loss_batch_all, loss_valid)

    mean_acc = np.mean(acc_batch_all, axis=0)
    mean_loss = np.mean(loss_batch_all)
    num_examples = np.sum(y_valid, axis=0)
    num_preds = np.sum(y_pred_all, axis=0)
    epoch_time = time.time() - start_time
    print('******************************************************************************'
          '********************************************************')
    print('--------------------------------------------------------Validation, Epoch: {}'
          ' -----------------------------------------------------------'.format(epoch + 1))
    print("Atlc\tCrdmg\tEffus\tInflt\tMass\tNodle\tPnum\tPntrx\tConsd"
          "\tEdma\tEmpys\tFbrss\tTkng\tHrna\t|Avg.\t|Loss\t|Run Time")
    for accu in mean_acc:
        print '{:.01%}\t'.format(accu),
    print '|{0:.01%}\t|{1:0.02}\t|{2}'.format(np.mean(mean_acc), mean_loss, epoch_time)

    for exm in num_examples:
        print '{:}\t'.format(exm),
    print("Count of pathalogies")
    for pred in num_preds:
        print '{:}\t'.format(pred),
    print("Count of recognized pathalogies")

    P = R = np.zeros((1, args.n_cls))
    for cond in range(args.n_cls):
        y_true = y_valid[:, cond]
        y_pred = y_pred_all[:, cond]
        P[0, cond], R[0, cond] = precision_recall(y_true, y_pred)
    P = np.reshape(P, args.n_cls)
    R = np.reshape(R, args.n_cls)

    for p in P:
        print '{:0.03}\t'.format(p),
    print("Precision")
    for r in R:
        print '{:0.03}\t'.format(r),
    print("Recall")

    plot_precision_recall_curve(y_valid[:logits_all.shape[0], :], logits_all, epoch)
    write_acc_loss_csv(mean_acc, mean_loss, epoch)
    write_precision_recall_csv(P, R, epoch)

    return mean_acc, mean_loss