예제 #1
0
def load_train_val_data(parser):
    print("load_train_val_data ... ")
    '''
    train = load.load_dataset("data/train.json")
    val = load.load_dataset("data/validation.json")
    preproc = load.preproc(*train)

    train_x, train_y = preproc.process(*train)
    val_x, val_y = preproc.process(*val)

    print("train size : {}, {}".format(len(train_x), len(train_y)))
    print("val size : {}, {}".format(len(val_x), len(val_y)))
    '''
    args = parser.parse_args()
    model = architecture.build_model()
    #print(model.summary())

    save_dir = make_save_dir("data/", "model")
    file_name = get_filename_for_saving(save_dir)
    check_pointer = keras.callbacks.ModelCheckpoint(filepath=file_name,
                                                    save_best_only=False)
    stopping = keras.callbacks.EarlyStopping(patience=10)
    reduce_lr = keras.callbacks.ReduceLROnPlateau(factor=0.1,
                                                  patience=2,
                                                  min_lr=default_lr * 0.001)

    model.fit(train_x,
              train_y,
              batch_size=int(args.batchsize),
              epochs=int(args.epochs),
              validation_data=(val_x, val_y))
예제 #2
0
def validation(global_weight=[]):
    if global_weight is not None:
        model = architecture.build_model()
        model.set_weights(global_weight)

        print("===> validation start")
        m_probs = model.predict(val_x)
        committee_labels = np.argmax(val_y, axis=2)
        committee_labels = committee_labels[:, 0]

        print("===================")
        temp = []
        preds = np.argmax(m_probs / prior, axis=2)
        for i, j in zip(preds, val_labels):
            t = sst.mode(i[:len(j) - 1])[0][0]
            temp.append(t)
            print(i[:len(j) - 1])

        preds = temp

        print("preds : \n", preds)

        report = skm.classification_report(committee_labels,
                                           preds,
                                           target_names=preproc.classes,
                                           digits=3)
        scores = skm.precision_recall_fscore_support(committee_labels,
                                                     preds,
                                                     average=None)
        print("report : \n", report)
        # print("scores : ", scores)

        cm = confusion_matrix(committee_labels, preds)
        print("confusion matrix : \n", cm)

        f1 = f1_score(committee_labels, preds, average='micro')
        print("f1_score : ", f1)

        # ***roc_auc_score - m_probs***

        m_probs = np.sum(m_probs, axis=1)
        m_probs = m_probs / 71  # one data set max size (element count) -> normalization

        # print(ground_truth.shape, m_probs.shape)

        ovo_auroc = roc_auc_score(committee_labels, m_probs, multi_class='ovo')
        ovr_auroc = roc_auc_score(committee_labels, m_probs, multi_class='ovr')

        print("ovr_auroc : ", ovr_auroc)
        print("ovo_auroc : ", ovo_auroc)

        result = {}

        save_result(model, current_round, result)

    print("===> validation end")
예제 #3
0
def local_train():

    save_dir = make_save_dir("data/", "model")
    file_name = get_filename_for_saving(save_dir)

    check_pointer = keras.callbacks.ModelCheckpoint(filepath=file_name,
                                                    save_best_only=False)
    stopping = keras.callbacks.EarlyStopping(patience=10)

    model = architecture.build_model()
    model.fit(train_x,
              train_y,
              batch_size=32,
              epochs=30,
              validation_data=(val_x, val_y),
              callbacks=[check_pointer, stopping])
    validation(model.get_weights())
예제 #4
0
def fl_task():
    print("fl task")
    global current_round

    global_round = request_current_round()
    if global_round == current_round:
        global_weight = request_global_weight()

        if global_weight is not None:
            validation(global_weight)

        model = architecture.build_model()

        if global_weight is not None:
            model.set_weights(global_weight)

        print("==> local training start")

        print("==> local training end")
        update_local_weight(model.get_weights())
        delay_compare_weight()
        current_round += 1

    delay_compare_weight()
예제 #5
0
replay_files = []
# r=root, d=directories, f = files
for r, d, f in os.walk(PATH):
    for file in f:
        if '.json' in file:
            replay_files.append(os.path.join(r, file))
for f in replay_files:
    print(f)
queue = [Queue(32)]
queue_m_sizes = [32]

batch_size = len(replay_files)

#exit(0)

build_model()
frames_node = tf.get_collection('frames')[0]
# can_afford_node = tf.get_collection('can_afford')[0]
turns_left_node = tf.get_collection('turns_left')[0]
my_ships_node = tf.get_collection('my_ships')[0]
moves_node = tf.get_collection('moves')[0]
spawn_node = tf.get_collection('spawn')[0]
loss_node = tf.get_collection('loss')[0]
optimizer_node = tf.get_collection('optimizer')[0]

saver = tf.train.Saver(max_to_keep=1)
# path = random.choice(replay_files)
# # Load all training data
# game = utils.Halite()
# #path = '1068739.json'
# game.load_replay(path)
예제 #6
0
def predict(parser):
    val = load.load_dataset("data/validation_2.json")
    preproc = load.preproc(*val)

    args = parser.parse_args()
    print("args model : ", args.model)

    model = architecture.build_model()
    model.load_weights(args.model)

    with open("data/validation_2.json", "rb") as fid:
        val_labels = [json.loads(l)['labels'] for l in fid]

    counts = collections.Counter(preproc.class_to_int[l[0]]
                                 for l in val_labels)
    counts = sorted(counts.most_common(), key=lambda x: x[0])
    counts = list(zip(*counts))[1]

    print("counts : ", counts)

    smooth = 500
    counts = np.array(counts)[None, None, :]
    total = np.sum(counts) + counts.shape[1]
    print("total : ", total)
    prior = (counts + smooth) / float(total)  # ???
    print("prior : ", prior)

    ecgs, committee_labels = preproc.process(*val)
    m_probs = model.predict(ecgs)

    committee_labels = np.argmax(committee_labels, axis=2)
    committee_labels = committee_labels[:, 0]

    print("===================")
    temp = []
    preds = np.argmax(m_probs / prior, axis=2)
    for i, j in zip(preds, val_labels):
        t = sst.mode(i[:len(j) - 1])[0][0]
        temp.append(t)
        #print(i[:len(j)-1])

    preds = temp

    #print("preds : \n", preds)

    report = skm.classification_report(committee_labels,
                                       preds,
                                       target_names=preproc.classes,
                                       digits=3)
    scores = skm.precision_recall_fscore_support(committee_labels,
                                                 preds,
                                                 average=None)
    print("report : \n", report)

    cm = confusion_matrix(committee_labels, preds)
    print("confusion matrix : \n", cm)

    f1 = f1_score(committee_labels, preds, average='micro')
    #print("f1_score : ", f1)

    # ***roc_auc_score - m_probs***
    s_probs = np.sum(m_probs, axis=1)
    s_probs = s_probs / 71  # one data set max size (element count) -> normalization

    #ovo_auroc = roc_auc_score(committee_labels, s_probs, multi_class='ovo')
    ovr_auroc = roc_auc_score(committee_labels, s_probs, multi_class='ovr')

    print("ovr_auroc : ", ovr_auroc)
    #print("ovo_auroc : ", ovo_auroc)
    '''
        bootstrapping
    '''
    n_bootstraps = 100
    np.random.seed(3033)

    total_precision = []
    total_recall = []
    total_f1 = []
    total_auroc = []

    precision = []
    recall = []
    f1 = []

    total = []

    for j in range(n_bootstraps):
        indices = np.random.random_integers(0, len(m_probs) - 1, 100)

        #print("indices : ", len(indices))

        if len(np.unique(committee_labels[indices])) < 2:
            continue

        sub_labels = []
        sub_result = []
        sub_probs = []

        #print(indices)

        for i in indices:
            sub_labels.append(committee_labels[i])
            sub_result.append(preds[i])
            sub_probs.append(m_probs[i])

        s_scores = precision_recall_fscore_support(sub_labels,
                                                   sub_result,
                                                   labels=[0, 1, 2, 3],
                                                   average=None)

        # ***roc_auc_score - m_probs***
        s_p = np.sum(sub_probs, axis=1)
        s_p = s_p / 71  # one data set max size (element count) -> normalization

        # ovo_auroc = roc_auc_score(committee_labels, s_probs, multi_class='ovo')
        #print(sub_labels)
        #print(s_p)

        try:
            s_auroc = roc_auc_score(sub_labels, s_p, multi_class='ovr')
        except:
            s_auroc = -1

        #print(s_scores)
        precision.append(np.array(s_scores[0]))
        recall.append(np.array(s_scores[1]))
        f1.append(np.array(s_scores[2]))
        #auroc.append(s_auroc)

        total_precision.append(np.average(s_scores[0]))
        total_recall.append(np.average(s_scores[1]))
        total_f1.append(np.average(s_scores[2]))
        total_auroc.append(s_auroc)

    total_precision.sort()
    total_recall.sort()
    total_f1.sort()
    total_auroc.sort()

    total_auroc.remove(-1)
    #print(total_auroc)
    '''
        bootstrapping 시 클래스가 존재하지 않는 케이스가 있을수도 있음 
    '''
    precision = np.array(precision)
    precision[precision == .0] = np.nan
    recall = np.array(recall)
    recall[recall == .0] = np.nan
    f1 = np.array(f1)
    f1[f1 == .0] = np.nan

    #print(total_auroc)

    for i in range(4):
        pre = precision[:, i]
        pre.sort()
        rec = recall[:, i]
        rec.sort()
        f = f1[:, i]
        f.sort()

        pre = np.round(pre[int(len(pre) * 0.025):int(len(pre) * 0.975)], 3)
        rec = np.round(rec[int(len(rec) * 0.025):int(len(rec) * 0.975)], 3)
        f = np.round(pre[int(len(f) * 0.025):int(len(f) * 0.975)], 3)
        '''
        print(i,
              " : ", "{0} ({1}, {2})".format(np.round(np.nanmean(pre), 3), round(pre[0], 3), round(pre[-1], 3)),
              " : ", "{0} ({1}, {2})".format(np.round(np.nanmean(rec), 3), round(rec[0], 3), round(rec[-1], 3)),
              " : ", "{0} ({1}, {2})".format(np.round(np.nanmean(f), 3), round(f[0], 3), round(f[-1], 3)))
        '''

        item = [
            i, "{0} ({1}, {2})".format(np.round(np.nanmean(pre), 3),
                                       round(np.nanmin(pre), 3),
                                       round(np.nanmax(pre), 3)),
            "{0} ({1}, {2})".format(np.round(np.nanmean(rec), 3),
                                    round(np.nanmin(rec), 3),
                                    round(np.nanmax(rec), 3)),
            "{0} ({1}, {2})".format(np.round(np.nanmean(f), 3),
                                    round(np.nanmin(f), 3),
                                    round(np.nanmax(f), 3))
        ]

        total.append(item)

    total_auroc = np.round(
        total_auroc[int(len(total_auroc) *
                        0.025):int(len(total_auroc) * 0.975)], 3)
    total_precision = np.round(
        total_precision[int(len(total_precision) *
                            0.025):int(len(total_precision) * 0.975)], 3)
    total_recall = np.round(
        total_recall[int(len(total_recall) *
                         .025):int(len(total_recall) * .975)], 3)
    total_f1 = np.round(
        total_f1[int(len(total_f1) * .025):int(len(total_f1) * .975)], 3)

    with open(args.file_name, "w", newline='') as file:
        writer = csv.writer(file)
        writer.writerow(["", "precision", "recall", "f1-score", "auroc"])
        writer.writerow([
            "",
            "{0} ({1}, {2})".format(np.round(np.average(scores[0]), 3),
                                    total_precision[0], total_precision[-1]),
            "{0} ({1}, {2})".format(np.round(np.average(scores[1]), 3),
                                    total_recall[0], total_recall[-1]),
            "{0} ({1}, {2})".format(np.round(np.average(scores[2]), 3),
                                    total_f1[0], total_f1[-1]),
            "{0} ({1}, {2})".format(np.round(ovr_auroc, 3), total_auroc[0],
                                    total_auroc[-1]),
        ])
        for i in total:
            writer.writerow(i)
예제 #7
0
filepath_loss = "./data/Multi_task_model.loss"
filepath_model = "./data/checkpoint/best_model.h5"

d_nucl = {"A": 0, "C": 1, "G": 2, "T": 3, "N": 4}
f_matrix, f_labels, f_pos = get_kmer_from_50mer(filepath_train)
f_matrix_val, f_labels_val, f_pos_val = get_kmer_from_50mer(filepath_val)

params = get_params_50mer()
d_weights = get_learning_weights(filepath_weights)

training_generator = DataGenerator_from_50mer(f_matrix, f_labels, f_pos,
                                              **params)
val_generator = DataGenerator_from_50mer(f_matrix_val, f_labels_val, f_pos_val,
                                         **params)

model = build_model()
print(model.summary())
model.compile(optimizer='adam',
              loss={
                  'output1': 'categorical_crossentropy',
                  'output2': 'categorical_crossentropy'
              },
              metrics=['accuracy'])

CallBacks = [
    EarlyStopping(monitor='val_loss', patience=5),
    ModelCheckpoint(filepath=filepath_model,
                    monitor='val_loss',
                    save_best_only=True)
]
예제 #8
0
params['AE epochs'] = 30
params['image res'] = 256
#params['kernel size'] = 3
params['n clusters'] = 3
params['n features'] = 64
params['output dir'] = r'/home/dykuang/UMI-SEG/output/'
params['input channels'] = 1
params['en spec'] = [8, 16,
                     32]  #Specify layer parameters for the U-net as encoder
params['de spec'] = [8, 8, 8]  #Specify layer parameters for the decoder

#params['training data list']=['vol_1_slice_{}.npy'.format(i) for i in range(10, 121)]

AE, _ = build_model(input_size=(params['image res'], params['image res'],
                                params['input channels']),
                    en_spec=params['en spec'],
                    de_spec=params['de spec'],
                    n_features=params['n features'],
                    n_clusters=params['n clusters'])
print(AE.summary())
print(AE.layers[-2].summary())
print(AE.layers[-1].summary())


def datagen(datapath, datalist, batchsize):
    x = np.zeros([
        batchsize, params['image res'], params['image res'],
        params['input channels']
    ])
    size = len(datalist)
    n_batches = size // batchsize
    index = 0
예제 #9
0
        load_weights_on_restart=False,
        save_best_only=False,
        save_freq=100,
        monitor='val_loss',
        verbose=1
    ),

    tf.keras.callbacks.TensorBoard(
        log_dir='./logs',
        histogram_freq=0,  # How often to log histogram visualizations
        embeddings_freq=0,  # How often to log embedding visualizations
        update_freq=32
    )
]

model = architecture.build_model(bert_path, max_seq_length)
print(model.summary())

# Instantiate variables
initialize_vars(sess)

model.fit(
    train_X, train_y,
    validation_data=(test_X, test_y),
    epochs=1,
    callbacks=callbacks,
    batch_size=32
)

model.save('BertModel.h5')