def test_get_my_doc_meta_with_category(client, admin, monkeypatch):
    login(client, admin)
    monkeypatch.setattr('app.task.views.DOCUMENT_PER_PAGE', 3)

    with captured_templates(client.application) as templates:
        assert client.get(
            url_for('task.my_doc_meta',
                    category=Category.FLIP.value)).status_code == 200
        template, context = templates.pop()
        assert template.name == 'task/document_dashboard.html'
        assert context['current_category'] == Category.FLIP.value
        assert set(get_ids(context['documents'].items)) == set(
            get_ids(
                DocumentMeta.objects(create_by=admin,
                                     category=Category.FLIP.value).order_by(
                                         '-priority', '-update_at').all()[:3]))

        assert client.get(
            url_for('task.my_doc_meta',
                    category=Category.SHORT_TERM.value,
                    page=2)).status_code == 200
        template, context = templates.pop()
        assert template.name == 'task/document_dashboard.html'
        assert context['current_category'] == Category.SHORT_TERM.value
        assert set(get_ids(context['documents'].items)) == set(
            get_ids(
                DocumentMeta.objects(
                    create_by=admin,
                    category=Category.SHORT_TERM.value).order_by(
                        '-priority', '-update_at').all()[3:6]))
def test_get_my_doc_meta_with_search(client, admin, monkeypatch):
    login(client, admin)
    monkeypatch.setattr('app.task.views.DOCUMENT_PER_PAGE', 3)

    with captured_templates(client.application) as templates:
        # search empty string to return all documents
        assert client.get(url_for('task.my_doc_meta',
                                  search='')).status_code == 200
        template, context = templates.pop()
        assert template.name == 'task/document_dashboard.html'
        assert context['current_search'] == ''
        assert set(get_ids(context['documents'].items)) == set(
            get_ids(
                DocumentMeta.objects(create_by=admin).order_by(
                    '-priority', '-update_at').all()[:3]))
Ejemplo n.º 3
0
def evaluate_sents(data_source, uids, batch_size=10):
    # Turn on evaluation mode which disables dropout.
    if args.model == 'QRNN': model.reset()
    model.eval()
    total_loss = 0
    ntokens = len(corpus.dictionary)
    hidden = model.init_hidden(batch_size)
    sent_loss = defaultdict(list)
    for i in range(0, data_source.size(0) - 1, args.bptt):
        data_batch = torch.load(open('test.pickle','rb'))
        data_test=data_batch['data']
        targets_test = data_batch['targets']
        data, targets = get_batch(data_source, i, args, evaluation=True)
        batch_uids = get_ids(uids, i, args, evaluation=True)
        # pdb.set_trace()
        output, hidden = model(data, hidden, decode=True)
        output_flat = output.view(-1, ntokens)
        per_word_loss = criterion(output_flat, targets)
        batch_uids_list = batch_uids.reshape(-1).tolist()
        loss_list = per_word_loss.tolist()
        for loss, uid in zip(loss_list, batch_uids_list):
            sent_loss[uid].append(loss)
        incre = (torch.mean(per_word_loss).item()*len(data))
        total_loss += incre
        # print('incre=',incre)
        hidden = repackage_hidden(hidden)
        # pdb.set_trace()
    avg_sent_loss = {}
    for (uid, losses) in sent_loss.items():
        avg_sent_loss[uid]=float(np.mean(losses))
    # pdb.set_trace()
    return total_loss / len(data_source), avg_sent_loss
Ejemplo n.º 4
0
def get_result(net, gpu=False):
    ids = get_ids(dir_img)

    val = get_imgs_and_masks(ids, dir_img, dir_mask, 1.0)

    val_dice = eval_net(net, val, gpu)
    print('Validation Dice Coeff: {}'.format(val_dice))
Ejemplo n.º 5
0
 def send_reqs(self):
     lines = utils.get_ids()
     for profile_id in lines:
         print('[] Send request to {}'.format(profile_id))
         req = utils.set_request(profile_id=profile_id)
         r = requests.post(config.urls['base'] + config.urls['connect'],
                           headers=req['headers'], data=req['body'])
         print('\t-> Done')
         utils.update_ids(profile_id=profile_id)
         print("\t-> Next round : {}\n".format(
             utils.get_next_round(round_duration=180)))
         time.sleep(180)
Ejemplo n.º 6
0
def relevant_full_corpus(kwLimit):
    #corpus = utils.get_data('SELECT id FROM refdesc WHERE abstract_keywords IS NOT NULL;','../../Data/dumps/20160224_cybergeo.sqlite3')
    corpus = utils.get_ids('cybergeo','keywords')
    occurence_dicos = utils.import_kw_dico('cybergeo','keywords')
    mongo = pymongo.MongoClient('localhost',27017)
    database = mongo['relevant']
    relevant = 'relevant_full_'+str(kwLimit)
    network = 'network_full_'+str(kwLimit)+'_eth10'
    database[relevant].delete_many({"cumtermhood":{"$gt":0}})
    database[relevant].create_index('keyword')
    [keywords,dico,frequencies,edge_list] = kwFunctions.extract_relevant_keywords(corpus,kwLimit,occurence_dicos)
    print('insert relevant...')
    for kw in keywords.keys():
        butils.update_kw_tm(kw,keywords[kw],frequencies[kw],math.log(keywords[kw])*math.log(len(corpus)/frequencies[kw]),database,relevant)
    print('insert edges...')
    database[network].delete_many({"weight":{"$gt":0}})
    database[network].insert_many(edge_list)
Ejemplo n.º 7
0
    def _init_data(self):

        if 'market' in str(self.data_dir).lower():
            self.dataset = 'market'
        elif 'duke' in str(self.data_dir).lower():
            self.dataset = 'duke'

        self.imgs = list(self.data_dir.glob('*.jpg'))
        # Filter out labels with -1
        self.imgs = [img for img in self.imgs if '-1' not in img.stem]

        self.cam_ids, self.labels, self.frames = get_ids(
            self.imgs, self.dataset)

        self.num_cams = len(set(self.cam_ids))
        self.classes = tuple(set(self.labels))

        # Convert labels to continuous idxs
        self.class_to_idx = {label: i for i, label in enumerate(self.classes)}
        self.targets = [self.class_to_idx[label] for label in self.labels]
Ejemplo n.º 8
0
def relevant_full_corpus(kwLimit):
    #corpus = utils.get_data('SELECT id FROM refdesc WHERE abstract_keywords IS NOT NULL;','../../Data/dumps/20160224_cybergeo.sqlite3')
    corpus = utils.get_ids('cybergeo', 'keywords')
    occurence_dicos = utils.import_kw_dico('cybergeo', 'keywords')
    mongo = pymongo.MongoClient('localhost', 27017)
    database = mongo['relevant']
    relevant = 'relevant_full_' + str(kwLimit)
    network = 'network_full_' + str(kwLimit) + '_eth10'
    database[relevant].delete_many({"cumtermhood": {"$gt": 0}})
    database[relevant].create_index('keyword')
    [keywords, dico, frequencies, edge_list
     ] = kwFunctions.extract_relevant_keywords(corpus, kwLimit,
                                               occurence_dicos)
    print('insert relevant...')
    for kw in keywords.keys():
        butils.update_kw_tm(
            kw, keywords[kw], frequencies[kw],
            math.log(keywords[kw]) * math.log(len(corpus) / frequencies[kw]),
            database, relevant)
    print('insert edges...')
    database[network].delete_many({"weight": {"$gt": 0}})
    database[network].insert_many(edge_list)
Ejemplo n.º 9
0
                       for player in sorted(players)]
        print '%s %s' % (score, salary)
        print '\t%s' % (', '.join(player_strs[:4]))
        print '\t%s' % (', '.join(player_strs[-4:]))

if __name__=='__main__':
    parser = argparse.ArgumentParser()
    parser.add_argument('-s', '--salaries', type=str)
    parser.add_argument('-p', '--prefill', type=str,
                        help=('Quoted list of comma-separated player names'
                              ' (e.g. "Stephen Curry, James Harden")'))
    parser.add_argument('-i', '--ignore', type=str,
                        help=('Quoted list of comma-separated player names'
                              ' (e.g. "Stephen Curry, James Harden")'))
    parser.add_argument('-m', '--min_score', type=float)
    args = parser.parse_args()

    if args.salaries:
        print 'Loading score data...'
        scores = get_scores_with_freq(get_ids(limit=7), min_games=3)
        salaries = load_salaries(args.salaries)
        prefill_players = ([x.strip() for x in args.prefill.split(',')]
                           if args.prefill else [])
        ignore_players = ([x.strip() for x in args.ignore.split(',')]
                          if args.ignore else [])
        min_score = args.min_score if args.min_score != None else -1
        generate(scores, salaries, prefill_players=prefill_players,
                 ignore_players=ignore_players, min_score=min_score)
    else:
        print 'Unable to run script: requires a --salaries argument'
Ejemplo n.º 10
0
    parser.add_argument('-s', '--salaries', type=str)
    parser.add_argument('-p',
                        '--prefill',
                        type=str,
                        help=('Quoted list of comma-separated player names'
                              ' (e.g. "Stephen Curry, James Harden")'))
    parser.add_argument('-i',
                        '--ignore',
                        type=str,
                        help=('Quoted list of comma-separated player names'
                              ' (e.g. "Stephen Curry, James Harden")'))
    parser.add_argument('-m', '--min_score', type=float)
    args = parser.parse_args()

    if args.salaries:
        print 'Loading score data...'
        scores = get_scores_with_freq(get_ids(limit=7), min_games=3)
        salaries = load_salaries(args.salaries)
        prefill_players = ([x.strip() for x in args.prefill.split(',')]
                           if args.prefill else [])
        ignore_players = ([x.strip() for x in args.ignore.split(',')]
                          if args.ignore else [])
        min_score = args.min_score if args.min_score != None else -1
        generate(scores,
                 salaries,
                 prefill_players=prefill_players,
                 ignore_players=ignore_players,
                 min_score=min_score)
    else:
        print 'Unable to run script: requires a --salaries argument'
Ejemplo n.º 11
0
                "taylor_batches": args.taylor_batches,
                "prune_channels": args.prune_channels,
                "gpu": args.gpu,
                "load": args.load,
                "channel_txt": args.channel_txt,
                "scale": args.scale,
                "lr": args.lr,
                "iters": args.iters,
                "epochs": args.epochs
            },
            indent=4,
            sort_keys=True)))

    # Dataset
    if not os.path.exists(splitfile):  # Our constant datasplit
        ids = get_ids(dir_img)  # [file1, file2]
        ids = split_ids(ids)  # [(file1, 0), (file1, 1), (file2, 0), ...]
        iddataset = split_train_val(ids, 0.2, splitfile)
        log.info("New split dataset")

    else:
        with open(splitfile) as f:
            iddataset = json.load(f)
        log.info("Load split dataset")

    train = get_imgs_and_masks(iddataset['train'], dir_img, dir_mask,
                               args.scale)
    val = get_imgs_and_masks(iddataset['val'], dir_img, dir_mask, args.scale)

    # Model Initialization
    net = UNet(n_channels=3, n_classes=1, f_channels=args.channel_txt)
def train_net(net,
              epochs=5,
              batch_size=1,
              lr=0.1,
              val_percent=0.2,
              save_cp=True,
              gpu=False,
              img_scale=0.5):
    path = [['data/ori1/', 'data/gt1/'],
            ['data/original1/', 'data/ground_truth1/'],
            ['data/Original/', 'data/Ground_Truth/']]
    dir_img = path[0][0]
    dir_mask = path[0][1]
    dir_checkpoint = 'sdgcheck/'

    ids = get_ids(dir_img)
    ids = split_ids(ids)

    iddataset = split_train_val(ids, val_percent)

    print('''
    Starting training:
        Epochs: {}
        Batch size: {}
        Learning rate: {}
        Training size: {}
        Validation size: {}
        Checkpoints: {}
        CUDA: {}
    '''.format(epochs, batch_size, lr, len(iddataset['train']),
               len(iddataset['val']), str(save_cp), str(gpu)))

    N_train = len(iddataset['train'])

    optimizer = optim.SGD(net.parameters(),
                          lr=lr,
                          momentum=0.7,
                          weight_decay=0.005)
    '''
    optimizer = optim.Adam(net.parameters(),
                      lr=lr,

                      weight_decay=0.0005)
    '''
    criterion = nn.BCELoss()

    for epoch in range(epochs):
        print('Starting epoch {}/{}.'.format(epoch + 1, epochs))
        net.train()

        # reset the generators
        train = get_imgs_and_masks(iddataset['train'], dir_img, dir_mask,
                                   img_scale)
        val = get_imgs_and_masks(iddataset['val'], dir_img, dir_mask,
                                 img_scale)

        epoch_loss = 0
        x = 0
        for i, b in enumerate(batch(train, batch_size)):
            imgs = np.array([i[0] for i in b]).astype(np.float32)
            true_masks = np.array([i[1] for i in b])
            '''
            ori=np.transpose(imgs[0], axes=[1, 2, 0])   
            scipy.misc.imsave("ori/ori_"+str(x)+'.jpg', ori)
            
            gt = np.stack((true_masks[0],)*3, axis=-1)
            
            #gt=np.transpose(true_masks[0], axes=[1, 2, 0])
            scipy.misc.imsave("gt/gt_"+str(x)+'.jpg', gt)
            '''
            imgs = torch.from_numpy(imgs)
            true_masks = torch.from_numpy(true_masks)

            x += 1
            if gpu:
                imgs = imgs.cuda()
                true_masks = true_masks.cuda()

            masks_pred = net(imgs)
            masks_probs_flat = masks_pred.view(-1)

            true_masks_flat = true_masks.view(-1)

            loss = criterion(masks_probs_flat, true_masks_flat)
            epoch_loss += loss.item()

            print('{0:.4f} --- loss: {1:.6f}'.format(i * batch_size / N_train,
                                                     loss.item()))

            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

        print('Epoch finished ! Loss: {}'.format(epoch_loss / i))

        if 1:
            val_dice = eval_net(net, val, gpu)
            print('Validation Dice Coeff: {}'.format(val_dice))

        if save_cp:
            torch.save(net.state_dict(),
                       dir_checkpoint + 'CP{}.pth'.format(epoch + 1))
            print('Checkpoint {} saved !'.format(epoch + 1))
Ejemplo n.º 13
0
def train_net(net,
              epochs=5,
              batch_size=1,
              lr=0.003,
              val_percent=0.20,
              loss_lambda=5,
              save_cp=True,
              gpu=False,
              img_scale=0.5,
              expositions_num=15,
              logg_freq=15,
              tb=False,
              w_decay=0.0005,
              use_notifications=False,
              polyaxon=False,
              outputs_path='checkpoints'):

    # === Localize training data ===================================================
    if polyaxon:
        data_paths = get_data_paths()
        dir_checkpoints = get_outputs_path()
        dataSets_dir = os.path.join(data_paths['data1'], 'eprado',
                                    'USLDR-DataSet')
        #dataSets_dir = os.path.join(data_paths['data1'] , 'eprado', 'LDR_DataSet')

    else:
        dataSets_dir = os.path.join(wk_dir, "LDR_DataSet")
        dir_checkpoints = os.path.join(wk_dir, outputs_path)
    print('Dataset_dir', dataSets_dir)
    print('Outputs_path', dir_checkpoints)
    experiment_id = datetime.datetime.now().strftime('%d%m_%H%M_')
    experiment_name = 'ExpandnetL_psn_{}_bs{}_lr{}_exps{}'.format(
        experiment_id, batch_size, lr, expositions_num)
    dir_img = os.path.join(dataSets_dir, 'Org_images/')
    dir_compressions = os.path.join(dataSets_dir, 'c_images/')
    dir_mask = os.path.join(dataSets_dir, 'c_images/')

    #if tb:
    #dummy_input = torch.rand(1, 3, 128, 128)
    #writer.add_graph(net, (dummy_input,))
    #writer.close()
    # === Load Training/Validation data =====================================================
    ids = get_ids(dir_compressions)
    # Split into train test
    idsset = list(ids)

    kf = KFold(n_splits=5, shuffle=False)
    #print('Train splits: ',kf.get_n_splits(dataset))

    best_psnr_m = 0
    best_psnr_hvs = 0
    #for train_index, test_index in kf.split(idsset):
    iddataset = split_train_val(idsset, expositions_num, val_percent)
    #test_set = []
    #for im_id in test_index:
    #    for e in range(expositions_num):
    #        test_set.append(idsset[im_id])

    N_train = len(iddataset['train'])
    N_val = len(iddataset['val'])
    N_test = 0  #len(test_set)

    #=====CHOOSE Loss Criterion=============================================================
    #criterion = nn.MSELoss(reduction='mean')
    criterion = ExpandNetLoss(loss_lambda=loss_lambda)
    optimizer = optim.Adagrad(net.parameters(),
                              lr=lr,
                              lr_decay=0.000001,
                              weight_decay=w_decay)
    #optimizer = optim.SGD(net.parameters(),
    #   lr=lr,
    #   momentum=0.9,
    #   weight_decay=0.0005)

    since = time.time()
    print('''
        Training SETUP:
        Epochs: {0:}
        Batch size: {1:}
        Optimizer: Adagrad
        Learning rate: {2:}
        Weight decay: {3:}
        Training size: {4:}
        Validation size: {5:}
        Test size: {6:}
        Checkpoints: {7:}
        CUDA: {8:}
        '''.format(epochs, batch_size, lr, w_decay, N_train, N_val, N_test,
                   str(save_cp), str(gpu)))

    train_dataset = HdrDataset(iddataset['train'], dir_compressions, dir_mask,
                               expositions_num)
    val_dataset = HdrDataset(iddataset['val'], dir_compressions, dir_mask,
                             expositions_num)
    #test_dataset = HdrDataset(test_set, dir_compressions, dir_mask,expositions_num)

    train_data_loader = DataLoader(train_dataset,
                                   batch_size=batch_size,
                                   shuffle=True,
                                   drop_last=False)
    val_data_loader = DataLoader(val_dataset,
                                 batch_size=batch_size,
                                 shuffle=False,
                                 drop_last=False)
    #test_data_loader = DataLoader(test_dataset,batch_size=batch_size,shuffle=True)
    best_hvsm = 0.0
    global_psnr_m = []
    global_psnr_hvs = []
    for epoch in range(epochs):
        print('\n')
        print('{}{}{}'.format('+', '=' * 78, '+'))
        print('| Starting epoch {}/{}. {}'.format(epoch + 1, epochs,
                                                  (' ' * 57) + '|'))
        print('{}{}{}'.format('|', '-' * 78, '|'))
        begin_of_epoch = time.time()
        tot_steps = math.trunc(N_train / batch_size)
        net.train()
        train_loss = 0
        losses = []
        val_loss = 0
        step = 0
        train_sample = []
        train_acc = 0
        val_hvsm = 0
        val_hvs = 0
        model_pnsr_m = 0

        for i, b in enumerate(train_data_loader):
            step += 1
            imgs, true_masks, imgs_ids = b['input'], b['target'], b['id']
            #print(i, b['input'].size(), b['target'].size())
            #input: [15, 3, 224, 224]), target: [15, 3, 224, 224]
            #print('>>>>>>> Input max: ' , torch.max(imgs[0]))
            #print('>>>>>>> mask max : ', torch.max(true_masks[0]))

            if gpu:
                imgs = imgs.cuda()
                true_masks = true_masks.cuda()
            else:
                print(' GPU not available')

            # Predicted mask images
            optimizer.zero_grad()
            prediction = net(imgs)  #prediction shape: [B, 3, 224, 224]
            #cost, cost_input_output = Hdr_loss(imgs, true_masks, prediction, sep_loss=False, gpu=gpu, tb=tb)
            cost = criterion(prediction, true_masks)
            #loss is torch tensor
            losses.append(cost.item())

            train_loss = np.mean(losses)
            cost.backward()
            optimizer.step()

            if step == 1 or step % logg_freq == 0:
                #print('| Step: {0:}, cost:{1:}, Train Loss:{2:.9f}, Train Acc:{3:.9f}'.format(step,cost, train_loss,train_acc/step))
                print('| Step: {0:}, cost:{1:}, Train Loss:{2:.9f}'.format(
                    step, cost, train_loss))

            #Last Step of this Epoch
            if step == math.trunc(tot_steps):
                num_in_batch = random.randrange(imgs.size(0))
                train_sample_name = imgs_ids[num_in_batch]
                train_sample = [
                    imgs[num_in_batch], true_masks[num_in_batch],
                    prediction[num_in_batch]
                ]

                t_exp_name = 'Train_' + experiment_name
                saveTocheckpoint(dir_checkpoints, t_exp_name,
                                 train_sample_name, epoch, train_sample[0],
                                 train_sample[1], train_sample[2])

                if tb:
                    print(
                        '| saving train step {0:} sample : input,target & pred'
                        .format(step))
                    grid = torchvision.utils.make_grid(train_sample, nrow=3)
                    writer.add_image('train_sample', grid, 0)

        #if  epoch == 1 or epoch % 15 == 0 or epoch == epochs:

        val_loss, val_hvsm, val_hvs = eval_hdr_net(net,
                                                   dir_checkpoints,
                                                   experiment_name,
                                                   val_data_loader,
                                                   criterion,
                                                   epoch,
                                                   gpu,
                                                   batch_size,
                                                   expositions_num=15,
                                                   tb=tb)
        if tb:
            writer.add_scalar('training_loss: ', train_loss, epoch)
            writer.add_scalar('validation_loss', val_loss, epoch)
            writer.add_scalar('val_hvsm', val_hvsm, epoch)
            writer.add_scalar('val_hvs', val_hvs, epoch)
            writer.add_scalars('losses', {
                'training_loss': train_loss,
                'val_loss': val_loss
            }, epoch)
            if polyaxon:
                experiment.log_metrics(step=epoch,
                                       training_loss=train_loss,
                                       validation_loss=val_loss,
                                       val_hvsm=val_hvsm,
                                       val_hvs=val_hvs)

        print('{}{}{}'.format('+', '=' * 78, '+'))
        print('| {0:} Epoch {1:} finished ! {2:}|'.format(
            ' ' * 28, (epoch + 1), ' ' * 29))
        print('{}{}{}'.format('+', '-' * 78, '+'))
        print('| Summary: Train Loss: {0:0.07}, Val Loss:{1:}'.format(
            train_loss, val_loss))
        print('|          Avrg psnr-hvs_m :{0:0.04},Avrg psnr-hvs :{1:0.04}'.
              format(val_hvsm, val_hvs))
        time_epoch = time.time() - begin_of_epoch
        print('| Epoch ETC: {:.0f}m {:.0f}s'.format(time_epoch // 60,
                                                    time_epoch % 60))
        print('{}{}{}'.format('+', '=' * 78, '+'))

        if save_cp and (val_hvsm > best_hvsm):
            best_hvsm = val_hvsm
            model_path = os.path.join(dir_checkpoints, 'BestCP.pth')
            torch.save(net.state_dict(), model_path)
            print('Checkpoint saved !')
        global_psnr_hvs.append(val_hvs)
        global_psnr_m.append(val_hvsm)
    '''
    test_psnr_m, test_psnr_hvs = test_hdr_net(model_path,dir_checkpoints,
                                                experiment_name,
                                                test_data_loader,
                                                criterion,gpu,tb)
                                                if save_cp and (test_psnr_m > best_psnr_m):
    best_psnr_m = test_psnr_m
    best_model_path = os.path.join(dir_checkpoints, 'Best_CP.pth')
    torch.save(net.state_dict(),best_model_path)
    print('Best model saved !')
    '''
    print('>' * 80)
    time_elapsed = time.time() - since
    print('Training complete in {:.0f}m {:.0f}s'.format(
        time_elapsed // 60, time_elapsed % 60))
    print('Final Average psnr-hvs_m: {:.0f}, psnr-hvs: {:.0f}'.format(
        np.mean(global_psnr_m), np.mean(global_psnr_hvs)))
    if tb:
        writer.close()
    if use_notifications:
        end_msg = "train.py finished at: {}(".format(
            str(datetime.datetime.now()))
        push = pb.push_note("usHDR: Finish", end_msg)
Ejemplo n.º 14
0
def main():
    parser = argparse.ArgumentParser(
        description='Graph CNNs for population graphs: '
        'classification of the ABIDE dataset')
    parser.add_argument(
        '--dropout',
        default=0.3,
        type=float,
        help='Dropout rate (1 - keep probability) (default: 0.3)')
    parser.add_argument(
        '--decay',
        default=5e-4,
        type=float,
        help='Weight for L2 loss on embedding matrix (default: 5e-4)')
    parser.add_argument(
        '--hidden1',
        default=32,
        type=int,
        help='Number of filters in hidden layers (default: 16)')
    # parser.add_argument('--lrate', default=0.005, type=float, help='Initial learning rate (default: 0.005)')
    parser.add_argument('--lrate',
                        default=1e-2,
                        type=float,
                        help='Initial learning rate (default: 0.005)')
    # parser.add_argument('--atlas', default='ho', help='atlas for network construction (node definition) (default: ho, '
    #                                                   'see preprocessed-connectomes-project.org/abide/Pipelines.html '
    #                                                   'for more options )')
    parser.add_argument('--epochs',
                        default=100,
                        type=int,
                        help='Number of epochs to train')
    parser.add_argument('--num_features',
                        default=2000,
                        type=int,
                        help='Number of features to keep for '
                        'the feature selection step (default: 2000)')
    parser.add_argument('--num_training',
                        default=1.0,
                        type=float,
                        help='Percentage of training set used for '
                        'training (default: 1.0)')
    parser.add_argument('--depth',
                        default=0,
                        type=int,
                        help='Number of additional hidden layers in the GCN. '
                        'Total number of hidden layers: 1+depth (default: 0)')
    parser.add_argument('--model',
                        default='gcn_cheby',
                        help='gcn model used (default: gcn_cheby, '
                        'uses chebyshev polynomials, '
                        'options: gcn, gcn_cheby, dense )')
    # parser.add_argument('--seed', default=89, type=int, help='Seed for random initialisation (default: 123)')
    parser.add_argument(
        '--folds',
        default=11,
        type=int,
        help='For cross validation, specifies which fold will be '
        'used. All folds are used if set to 11 (default: 11)')
    parser.add_argument(
        '--save',
        default=200,
        type=int,
        help='Parameter that specifies if results have to be saved. '
        'Results will be saved if set to 1 (default: 1)')
    parser.add_argument('--connectivity',
                        default='correlation',
                        help='Type of connectivity used for network '
                        'construction (default: correlation, '
                        'options: correlation, partial correlation, '
                        'tangent)')
    parser.add_argument('--train', default=1, type=int)

    args = parser.parse_args()
    start_time = time.time()

    # GCN Parameters
    params = dict()
    params['model'] = args.model  # gcn model using chebyshev polynomials
    params['lrate'] = args.lrate  # Initial learning rate
    params['epochs'] = args.epochs  # Number of epochs to train
    params['dropout'] = args.dropout  # Dropout rate (1 - keep probability)
    params['hidden1'] = args.hidden1  # Number of units in hidden layers
    params['decay'] = args.decay  # Weight for L2 loss on embedding matrix
    params['early_stopping'] = params[
        'epochs']  # Tolerance for early stopping (# of epochs). No early stopping if set to param.epochs
    params['max_degree'] = 3  # Maximum Chebyshev polynomial degree.
    params[
        'depth'] = args.depth  # number of additional hidden layers in the GCN. Total number of hidden layers: 1+depth
    # params['seed'] = args.seed                      # seed for random initialisation

    # GCN Parameters
    params[
        'num_features'] = args.num_features  # number of features for feature selection step
    params[
        'num_training'] = args.num_training  # percentage of training set used for training
    params[
        'train'] = args.train  # percentage of training set used for training
    # atlas = args.atlas                              # atlas for network construction (node definition)
    # connectivity = args.connectivity                # type of connectivity used for network construction

    # Get class labels
    # subject_IDs = Reader.get_ids()
    ##################################################################
    subject_IDs, shuffled_indices = Reader.get_ids()
    ##################################################################

    labels = Reader.get_labels(subject_IDs, score='DX_Group')  # labels

    # Get acquisition site
    # ####### sites = Reader.get_subject_score(subject_IDs, score='SITE_ID')
    ########## unique = np.unique(list(sites.values())).tolist()

    num_classes = 2  # MDD or HC
    num_nodes = len(subject_IDs)

    # Initialise variables for class labels and acquisition sites
    y_data = np.zeros([num_nodes, num_classes])
    y = np.zeros([num_nodes, 1])
    ########## site = np.zeros([num_nodes, 1], dtype=np.int)

    # Get class labels and acquisition site for all subjects
    for i in range(num_nodes):
        y_data[i, int(labels[subject_IDs[i]]) - 1] = 1
        y[i] = int(labels[subject_IDs[i]])
        ########## site[i] = unique.index(sites[subject_IDs[i]])

    import pickle
    # with open('./label.pkl', 'wb') as filehandle:
    #     pickle.dump(np.argmax(y_data, axis=1), filehandle)

    # Compute feature vectors (vectorised connectivity networks)
    ####### Granger Causality Analysis
    # data_fld = './granger_casuality'
    # features = Reader.load_ec_GCA(subject_IDs, data_fld)
    #######

    features = Reader.get_networks(subject_IDs,
                                   variable='correlation',
                                   isDynamic=False,
                                   isEffective=True)
    ############################################################
    shuffled_features = features[shuffled_indices]
    features = shuffled_features.copy()
    ############################################################
    # features = Reader.get_networks(subject_IDs, variable='graph_measure', isDynamic=True)

    # np.save('./MDD_dataset/features_GCA.npy', features)
    # np.save('./MDD_dataset/labels.npy', np.argmax(y_data, axis=1))

    # Compute population graph using gender and acquisition site
    graph = Reader.create_affinity_graph_from_scores(['Age', 'Sex'],
                                                     subject_IDs)
    # graph = Reader.create_affinity_graph_from_scores(['Sex'], subject_IDs)

    # Folds for cross validation experiments
    #num_samples = np.shape(features)[0]
    skf = StratifiedKFold(n_splits=10)
    #loo = LeaveOneOut()

    train_ind_set = []
    test_ind_set = []
    for train_ind, test_ind in reversed(
            list(skf.split(np.zeros(num_nodes), np.squeeze(y)))):
        train_ind_set.append(train_ind)
        test_ind_set.append(test_ind)
    cur_time = time.time()

    # import pickle
    # with open('./MDD_dataset/train_ind.pkl', 'wb') as filehandle:
    #     pickle.dump(train_ind_set, filehandle)
    # with open('./MDD_dataset/test_ind.pkl', 'wb') as filehandle:
    #     pickle.dump(test_ind_set, filehandle)

    if args.folds == 11:  # run cross validation on all folds
        scores = Parallel(n_jobs=10)(delayed(train_fold)(
            cv, train_ind, test_ind, test_ind, graph, features, y, y_data,
            params, subject_IDs, cur_time) for train_ind, test_ind, cv in zip(
                train_ind_set, test_ind_set, range(10)))

        test_auc = [x[0] for x in scores]
        test_accuracy = [x[1] for x in scores]
        test_sensitivity = [x[2] for x in scores]
        test_specificity = [x[3] for x in scores]
        test_pred = [x[4] for x in scores]
        test_lab = [x[5] for x in scores]

        print('Accuracy : ' + str(np.mean(test_accuracy)) + ' + ' +
              str(np.std(test_accuracy)))
        print('Sensitivity : ' + str(np.mean(test_sensitivity)) + ' + ' +
              str(np.std(test_sensitivity)))
        print('Specificity : ' + str(np.mean(test_specificity)) + ' + ' +
              str(np.std(test_specificity)))
        print('AUC : ' + str(np.mean(test_auc)) + ' + ' +
              str(np.std(test_auc)))

        # np.savez('./statistical_test/FC_Lasso_MLP_pred.npz', pred=test_pred, allow_pickle=True)
        # np.savez('./statistical_test/FC_Lasso_MLP_lab.npz', lab=test_lab, allow_pickle=True)

    else:  # compute results for only one fold

        cv_splits = list(skf.split(features, np.squeeze(y)))

        train = cv_splits[args.folds][0]
        test = cv_splits[args.folds][1]

        val = test

        scores_acc, scores_auc, scores_lin, scores_auc_lin, fold_size = train_fold(
            train, test, val, graph, features, y, y_data, params, subject_IDs,
            cur_time)

        print('overall linear accuracy %f' +
              str(np.sum(scores_lin) * 1. / fold_size))
        print('overall linear AUC %f' + str(np.mean(scores_auc_lin)))
        print('overall accuracy %f' + str(np.sum(scores_acc) * 1. / fold_size))
        print('overall AUC %f' + str(np.mean(scores_auc)))
Ejemplo n.º 15
0
def train(data_path, *, base_output_path="models", run_name=None,
          data_name=None, net_name="wave_net", clean=False, input_length=9,
          output_length=1,  n_markers=60, stride=1, train_fraction=.85,
          val_fraction=0.15, only_moving_frames=False, n_filters=512,
          filter_width=2, layers_per_level=3, n_dilations=None,
          latent_dim=750, epochs=50, batch_size=1000,
          lossfunc='mean_squared_error', lr=1e-4, batches_per_epoch=0,
          val_batches_per_epoch=0, reduce_lr_factor=0.5, reduce_lr_patience=3,
          reduce_lr_min_delta=1e-5, reduce_lr_cooldown=0,
          reduce_lr_min_lr=1e-10, save_every_epoch=False):
    """Trains the network and saves the results to an output directory.

    :param data_path: Path to an HDF5 file with marker data.
    :param base_output_path: Path to folder in which the run data folder will
                             be saved
    :param run_name: Name of the training run. If not specified, will be
                     formatted according to other parameters.
    :param data_name: Name of the dataset for use in formatting run_name
    :param net_name: Name of the network for use in formatting run_name
    :param clean: If True, deletes the contents of the run output path
    :param input_length: Number of frames to input into model
    :param output_length: Number of frames model will attempt to predict
    :param n_markers: Number of markers to use
    :param stride: Downsampling rate of training set.
    :param train_fraction: Fraction of dataset to use as training
    :param val_fraction: Fraction of dataset to use as validation
    :param only_moving_frames: If True only use moving_frames.
    :param filter_width: Width of base convolution filter
    :param layers_per_level: Number of layers to use at each convolutional
                             block
    :param n_dilations: Number of dilations for wavenet filters.
                        (See models.wave_net)
    :param latent_dim: Number of latent dimensions (Currently just for LSTM)
    :param n_filters: Number of filters to use as baseline (see create_model)
    :param epochs: Number of epochs to train for
    :param batch_size: Number of samples per batch
    :param batches_per_epoch: Number of batches per epoch (validation is
                              evaluated at the end of the epoch)
    :param val_batches_per_epoch: Number of batches for validation
    :param reduce_lr_factor: Factor to reduce the learning rate by (see
                             ReduceLROnPlateau)
    :param reduce_lr_patience: How many epochs to wait before reduction (see
                               ReduceLROnPlateau)
    :param reduce_lr_min_delta: Minimum change in error required before
                                reducing LR (see ReduceLROnPlateau)
    :param reduce_lr_cooldown: How many epochs to wait after reduction before
                               LR can be reduced again (see ReduceLROnPlateau)
    :param reduce_lr_min_lr: Minimum that the LR can be reduced down to (see
                             ReduceLROnPlateau)
    :param save_every_epoch: Save weights at every epoch. If False, saves only
                             initial, final and best weights.
    """
    # Set the n_dilations param
    if n_dilations is None:
        n_dilations = np.int32(np.floor(np.log2(input_length)))
    else:
        n_dilations = int(n_dilations)

    # Load Data
    print('Loading Data')
    markers, marker_means, marker_stds, bad_frames, moving_frames = \
        load_dataset(data_path)
    moving_frames = np.squeeze(moving_frames > 0)
    if only_moving_frames:
        markers = markers[moving_frames, :]
        bad_frames = bad_frames[moving_frames, :]
    markers = markers[::stride, :]
    bad_frames = bad_frames[::stride, :]

    # Get Ids
    print('Getting indices')
    [input_ids, target_ids] = get_ids(bad_frames, input_length,
                                      output_length, True, True)

    # Get the training, testing, and validation trajectories by indexing into
    # the marker arrays
    n_train = np.int32(np.round(input_ids.shape[0]*train_fraction))
    n_val = np.int32(np.round(input_ids.shape[0]*val_fraction))
    X = markers[input_ids[:n_train, :], :]
    Y = markers[target_ids[:n_train, :], :]
    val_X = markers[input_ids[n_train:(n_train+n_val), :], :]
    val_Y = markers[target_ids[n_train:(n_train+n_val), :], :]
    test_X = markers[input_ids[(n_train+n_val):, :], :]
    test_Y = markers[target_ids[(n_train+n_val):, :], :]

    # Create network
    print('Compiling network')
    if isinstance(net_name, keras.models.Model):
        model = net_name
        net_name = model.name
    elif net_name == 'wave_net':
        model = create_model(net_name, lossfunc=lossfunc, lr=lr,
                             input_length=input_length,
                             output_length=output_length, n_markers=n_markers,
                             n_filters=n_filters, filter_width=filter_width,
                             layers_per_level=layers_per_level,
                             n_dilations=n_dilations, print_summary=False)
    elif net_name == 'lstm_model':
        model = create_model(net_name, lossfunc=lossfunc, lr=lr,
                             input_length=input_length, n_markers=n_markers,
                             latent_dim=latent_dim, print_summary=False)
    elif net_name == 'wave_net_res_skip':
        model = create_model(net_name, lossfunc=lossfunc, lr=lr,
                             input_length=input_length, n_markers=n_markers,
                             n_filters=n_filters, filter_width=filter_width,
                             layers_per_level=layers_per_level,
                             n_dilations=n_dilations, print_summary=True)
    if model is None:
        print("Could not find model:", net_name)
        return

    # Build run name if needed
    if data_name is None:
        data_name = os.path.splitext(os.path.basename(data_path))[0]
    if run_name is None:
        run_name = "%s-%s_epochs=%d_input_%d_output_%d" \
            % (data_name, net_name, epochs, input_length, output_length)
    print("data_name:", data_name)
    print("run_name:", run_name)

    # Initialize run directories
    print('Building run folders')
    run_path = create_run_folders(run_name, base_path=base_output_path,
                                  clean=clean)

    # Save the training information in a mat file.
    print('Saving training info')
    savemat(os.path.join(run_path, "training_info.mat"),
            {"data_path": data_path, "base_output_path": base_output_path,
             "run_name": run_name, "data_name": data_name,
             "net_name": net_name, "clean": clean, "stride": stride,
             "input_length": input_length, "output_length": output_length,
             "n_filters": n_filters, "n_markers": n_markers, "epochs": epochs,
             "batch_size": batch_size, "train_fraction": train_fraction,
             "val_fraction": val_fraction,
             "only_moving_frames": only_moving_frames,
             "filter_width": filter_width,
             "layers_per_level": layers_per_level, "n_dilations": n_dilations,
             "batches_per_epoch": batches_per_epoch,
             "val_batches_per_epoch": val_batches_per_epoch,
             "reduce_lr_factor": reduce_lr_factor,
             "reduce_lr_patience": reduce_lr_patience,
             "reduce_lr_min_delta": reduce_lr_min_delta,
             "reduce_lr_cooldown": reduce_lr_cooldown,
             "reduce_lr_min_lr": reduce_lr_min_lr,
             "save_every_epoch": save_every_epoch})

    # Save initial network
    print('Saving initial network')
    model.save(os.path.join(run_path, "initial_model.h5"))

    # Initialize training callbacks
    history_callback = LossHistory(run_path=run_path)
    reduce_lr_callback = ReduceLROnPlateau(monitor="val_loss",
                                           factor=reduce_lr_factor,
                                           patience=reduce_lr_patience,
                                           verbose=1, mode="auto",
                                           epsilon=reduce_lr_min_delta,
                                           cooldown=reduce_lr_cooldown,
                                           min_lr=reduce_lr_min_lr)
    if save_every_epoch:
        save_string = "weights/weights.{epoch:03d}-{val_loss:.9f}.h5"
        checkpointer = ModelCheckpoint(filepath=os.path.join(run_path,
                                       save_string), verbose=1,
                                       save_best_only=False)
    else:
        checkpointer = ModelCheckpoint(filepath=os.path.join(run_path,
                                       "best_model.h5"), verbose=1,
                                       save_best_only=True)

    # Train!
    print('Training')
    t0_train = time()
    training = model.fit(X, Y, batch_size=batch_size, epochs=epochs,
                         verbose=1, validation_data=(val_X, val_Y),
                         callbacks=[history_callback, checkpointer,
                                    reduce_lr_callback])

    # Compute total elapsed time for training
    elapsed_train = time() - t0_train
    print("Total runtime: %.1f mins" % (elapsed_train / 60))

    # Save final model
    print('Saving final model')
    model.history = history_callback.history
    model.save(os.path.join(run_path, "final_model.h5"))
Ejemplo n.º 16
0
def train_net(
        net,
        epochs=5,
        batch_size=1,
        lr=0.1,
        val_percent=0.05,  # 训练集:验证集= 0.95: 0.05
        save_cp=True,
        gpu=False,
        img_scale=0.5):

    dir_img = opt_train.dir_img
    dir_mask = opt_train.dir_mask
    dir_checkpoint = opt_train.dir_checkpoint

    # 得到 图片路径列表  ids为 图片名称(无后缀名)
    ids = get_ids(dir_img)
    # 得到truple元组  (无后缀名的 图片名称,序号)
    # eg:当n为2  图片名称为bobo.jpg 时, 得到(bobo,0) (bobo,1)
    # 当序号为0 时,裁剪宽度,得到左边部分图片  当序号为1 时,裁剪宽度,得到右边部分图片
    ids = split_ids(ids)
    # 打乱数据集后,按照val_percent的比例来 切分 训练集 和 验证集
    iddataset = split_train_val(ids, val_percent)

    print('''
    开始训练:
        Epochs: {}
        Batch size: {}
        Learning rate: {}
        训练集大小: {}
        验证集大小: {}
        GPU: {}
    '''.format(epochs, batch_size, lr, len(iddataset['train']),
               len(iddataset['val']), str(gpu)))

    #训练集大小
    N_train = len(iddataset['train'])

    optimizer = optim.SGD(net.parameters(),
                          lr=lr,
                          momentum=0.9,
                          weight_decay=0.0005)

    #二进制交叉熵
    criterion = nn.BCELoss()

    for epoch in range(epochs):
        print('Starting epoch {}/{}.'.format(epoch + 1, epochs))

        # reset the generators
        # 每轮epoch得到 训练集  和 验证集
        train = get_imgs_and_masks(iddataset['train'], dir_img, dir_mask,
                                   img_scale)
        val = get_imgs_and_masks(iddataset['val'], dir_img, dir_mask,
                                 img_scale)

        # 重置epoch损失计数器
        epoch_loss = 0

        for i, b in enumerate(batch(train, batch_size)):
            # 得到 一个batch的 imgs tensor 及 对应真实mask值
            # 当序号为0 时,裁剪宽度,得到左边部分图片[384,384,3]   当序号为1 时,裁剪宽度,得到右边部分图片[384,190,3]
            imgs = np.array([i[0] for i in b]).astype(np.float32)
            true_masks = np.array([i[1] for i in b])

            # 将值转为 torch tensor
            imgs = torch.from_numpy(imgs)
            true_masks = torch.from_numpy(true_masks)

            # 训练数据转到GPU上
            if gpu:
                imgs = imgs.cuda()
                true_masks = true_masks.cuda()

            # 得到 网络输出的预测mask [10,1,384,384]
            masks_pred = net(imgs)
            # 经过sigmoid
            masks_probs = F.sigmoid(masks_pred)
            masks_probs_flat = masks_probs.view(-1)

            true_masks_flat = true_masks.view(-1)
            # 计算二进制交叉熵损失
            loss = criterion(masks_probs_flat, true_masks_flat)
            # 统计一个epoch的所有batch的loss之和,用以计算 一个epoch的 loss均值
            epoch_loss += loss.item()

            # 输出 当前epoch的第几个batch  及 当前batch的loss
            print('{0:.4f} --- loss: {1:.6f}'.format(i * batch_size / N_train,
                                                     loss.item()))

            # 优化器梯度清零
            optimizer.zero_grad()
            # 反向传播
            loss.backward()
            # 更新参数
            optimizer.step()

        # 一轮epoch结束,该轮epoch的 loss均值
        print('Epoch finished ! Loss: {}'.format(epoch_loss / i))

        # 每轮epoch之后使用验证集进行评价
        if True:
            # 评价函数:Dice系数   Dice距离用于度量两个集合的相似性
            val_dice = eval_net(net, val, gpu)
            print('Validation Dice Coeff: {}'.format(val_dice))

        # 保存模型
        if save_cp:
            torch.save(net.state_dict(),
                       dir_checkpoint + 'CP{}.pth'.format(epoch + 1))
            print('Checkpoint {} saved !'.format(epoch + 1))
Ejemplo n.º 17
0
def train_net(net,
              epochs=5,
              batch_size=1,
              lr=0.1,
              val_percent=0.05,
              save_cp=True,
              gpu=False,
              img_scale=0.5):

    dir_img = 'data/train/'  # 训练图像文件夹
    dir_mask = 'data/train_masks/'  # 图像的结果文件夹
    dir_checkpoint = 'checkpoints/'  # 训练好的网络保存文件夹

    ids = get_ids(dir_img)  # 图片名字的后4位为数字,能作为图片id

    # 得到元祖列表为[(id1,0),(id1,1),(id2,0),(id2,1),...,(idn,0),(idn,1)]
    # 这样的作用是后面重新设置生成器时会通过后面的0,1作为utils.py中get_square函数的pos参数,pos=0的取左边的部分,pos=1的取右边的部分
    # 这样图片的数量就会变成2倍
    ids = split_ids(ids)

    iddataset = split_train_val(ids, val_percent)  # 将数据分为训练集和验证集两份

    print('''
    Starting training:
        Epochs: {}
        Batch size: {}
        Learning rate: {}
        Training size: {}
        Validation size: {}
        Checkpoints: {}
        CUDA: {}
    '''.format(epochs, batch_size, lr, len(iddataset['train']),
               len(iddataset['val']), str(save_cp), str(gpu)))

    N_train = len(iddataset['train'])  # 训练集长度

    optimizer = optim.SGD(
        net.parameters(),  # 定义优化器
        lr=lr,
        momentum=0.9,
        weight_decay=0.0005)

    criterion = nn.BCELoss()  # 损失函数

    for epoch in range(epochs):  # 开始训练
        print('Starting epoch {}/{}.'.format(epoch + 1, epochs))
        net.train()  # 设置为训练模式

        # reset the generators重新设置生成器
        # 对输入图片dir_img和结果图片dir_mask进行相同的图片处理,即缩小、裁剪、转置、归一化后,将两个结合在一起,返回(imgs_normalized, masks)
        train = get_imgs_and_masks(iddataset['train'], dir_img, dir_mask,
                                   img_scale)
        val = get_imgs_and_masks(iddataset['val'], dir_img, dir_mask,
                                 img_scale)

        epoch_loss = 0

        for i, b in enumerate(batch(train, batch_size)):
            imgs = np.array([i[0] for i in b]).astype(np.float32)  # 得到输入图像数据
            true_masks = np.array([i[1] for i in b])  # 得到图像结果数据

            imgs = torch.from_numpy(imgs)
            true_masks = torch.from_numpy(true_masks)

            if gpu:
                imgs = imgs.cuda()
                true_masks = true_masks.cuda()

            masks_pred = net(imgs)  # 图像输入的网络后得到结果masks_pred,结果为灰度图像
            masks_probs_flat = masks_pred.view(-1)  # 将结果压扁

            true_masks_flat = true_masks.view(-1)

            loss = criterion(masks_probs_flat, true_masks_flat)  # 对两个结果计算损失
            epoch_loss += loss.item()

            print('{0:.4f} --- loss: {1:.6f}'.format(i * batch_size / N_train,
                                                     loss.item()))

            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

        print('Epoch finished ! Loss: {}'.format(epoch_loss /
                                                 i))  # 一次迭代后得到的平均损失

        if 1:
            val_dice = eval_net(net, val, gpu)
            print('Validation Dice Coeff: {}'.format(val_dice))

        if save_cp:
            torch.save(net.state_dict(),
                       dir_checkpoint + 'CP{}.pth'.format(epoch + 1))
            print('Checkpoint {} saved !'.format(epoch + 1))
Ejemplo n.º 18
0
        if score_tup[1] >= min_games
    }

def get_scores_with_freq(ids, min_games=0):
    return {
        player: score_tup
        for player, score_tup in get_weighted_scores(ids).iteritems()
        if score_tup[1] >= min_games
    }

if __name__=='__main__':
    parser = argparse.ArgumentParser()
    parser.add_argument('-w', '--weighted', action='store_const', const=True)
    parser.add_argument('-d', '--deltas', action='store_const', const=True)
    parser.add_argument('-l', '--limit', type=int)
    parser.add_argument('-s', '--salaries', type=str)
    parser.add_argument('-v', '--verbose', action='store_const', const=True)
    args = parser.parse_args()

    ids = get_ids(args.limit) if args.limit else get_ids()
    print 'Computing results with contests: %s' % ', '.join(ids)

    if args.weighted:
        if args.limit:
            run_weighted(ids[-args.limit:], args.salaries, args.verbose)
        else:
            run_weighted(ids, args.salaries, args.verbose)
    if args.deltas:
        limit = args.limit if args.limit else 7
        run_deltas(ids[-limit+2:], ids[-limit:-2], args.salaries, args.verbose)
Ejemplo n.º 19
0
def train_net(net,
              epochs=20,
              batch_size=1,
              lr=0.1,
              lrd=0.99,
              val_percent=0.05,
              save_cp=True,
              gpu=True,
              img_scale=0.5,
              imagepath='',
              maskpath='',
              cpsavepath=''):

    dir_img = imagepath
    dir_mask = maskpath
    dir_checkpoint = cpsavepath
    classweight = [1, 2, 3, 2]

    ids = get_ids(dir_img)
    ids = split_ids(ids)

    iddataset = split_train_val(ids, val_percent)

    logname = cpsavepath + '/' + 'losslog.txt'

    print('''
    Starting training:
        Epochs: {}
        Batch size: {}
        Learning rate: {}
        Training size: {}
        Validation size: {}
        Checkpoints: {}
        CUDA: {}
    '''.format(epochs, batch_size, lr, len(iddataset['train']),
               len(iddataset['val']), str(save_cp), str(gpu)))

    N_train = len(iddataset['train'])

    optimizer = optim.SGD(net.parameters(),
                          lr=lr,
                          momentum=0.9,
                          weight_decay=0.0005)

    # classweight = [1,4,8,4]
    criterion = BCELoss_weight(classweight)

    for epoch in range(epochs):
        print('Starting epoch {}/{}.'.format(epoch + 1, epochs))
        with open(logname, "a") as f:
            f.write('Starting epoch {}/{}.'.format(epoch + 1, epochs) + "\n")
        net.train()

        # reset the generators
        train = get_imgs_and_masks(iddataset['train'], dir_img, dir_mask,
                                   img_scale)
        val = get_imgs_and_masks(iddataset['val'], dir_img, dir_mask,
                                 img_scale)

        epoch_loss = 0

        lr = lr * lrd
        for param_group in optimizer.param_groups:
            param_group['lr'] = lr
        print('lr', lr)
        for i, b in enumerate(batch(train, batch_size)):
            imgs = np.array([i[0] for i in b]).astype(np.float32)
            true_masks = np.array([i[1] for i in b])

            true_masks = np.transpose(true_masks, axes=[0, 3, 1, 2])
            imgs = torch.from_numpy(imgs)
            true_masks = torch.from_numpy(true_masks)

            if gpu:
                imgs = imgs.cuda()
                true_masks = true_masks.cuda()

            masks_pred = net(imgs)
            # print('masks_pred.shape',masks_pred.shape)
            # print('true_masks.shape', true_masks.shape)
            masks_probs_flat = masks_pred

            true_masks_flat = true_masks
            loss = criterion(masks_probs_flat, true_masks_flat)
            epoch_loss += loss.item()

            printinfo = '{0:.4f} --- loss: {1:.6f}'.format(
                i * batch_size / N_train, loss.item())
            print(printinfo)

            with open(logname, "a") as f:
                f.write(printinfo + "\n")

            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

        print('Epoch finished ! Loss: {}'.format(epoch_loss / i))
        with open(logname, "a") as f:
            f.write('Epoch finished ! Loss: {}'.format(epoch_loss / i) + "\n")
        if 1:
            val_dice = eval_net(net, val)
            print('Validation Dice Coeff: {}'.format(val_dice))
            with open(logname, "a") as f:
                f.write('Validation Dice Coeff: {}'.format(val_dice) + "\n")

        if save_cp:
            torch.save(net.state_dict(),
                       dir_checkpoint + 'CP{}.pth'.format(epoch + 1))
            print('Checkpoint {} saved !'.format(epoch + 1))
            with open(logname, "a") as f:
                f.write('Checkpoint {} saved !'.format(epoch + 1) + "\n")
Ejemplo n.º 20
0
def train_net(net,
              epochs=5,
              batch_size=1,
              lr=0.1,
              val_percent=0.05,
              save_cp=True,
              gpu=False,
              img_scale=0.5):

    dir_img = '../dataset/train/images/'
    dir_mask = '../dataset/train/masks/'
    dir_checkpoint = 'checkpoints/'

    ids = get_ids(dir_img)
    ids = split_ids(ids)

    iddataset = split_train_val(ids, val_percent)

    print(('''
    Starting training:
        Epochs: {}
        Batch size: {}
        Learning rate: {}
        Training size: {}
        Validation size: {}
        Checkpoints: {}
        CUDA: {}
    '''.format(epochs, batch_size, lr, len(iddataset['train']),
               len(iddataset['val']), str(save_cp), str(gpu))))

    N_train = len(iddataset['train'])

    optimizer = optim.SGD(net.parameters(),
                          lr=lr,
                          momentum=0.9,
                          weight_decay=0.0005)

    criterion = nn.BCELoss()

    for epoch in range(epochs):
        print(('Starting epoch {}/{}.'.format(epoch + 1, epochs)))
        net.train()

        # reset the generators
        train = get_imgs_and_masks(iddataset['train'], dir_img, dir_mask, img_scale)
        val = get_imgs_and_masks(iddataset['val'], dir_img, dir_mask, img_scale)

        epoch_loss = 0

        for i, b in enumerate(batch(train, batch_size)): # b[batch_id][0/1]: a batch of image(0)+mask(1)
            #print(('b[0]',b[0][0].shape,b[0][1].shape))
            #imgs = []
            #for img_msk in b:
            #    imgs.append(img_msk[0])
            #print(len(imgs))
            #imgs = np.array(imgs) # Wrong: not all images are of the same shape
            imgs = np.array([i[0] for i in b]).astype(np.float32)
            true_masks = np.array([i[1] for i in b])

            imgs = torch.from_numpy(imgs)
            true_masks = torch.from_numpy(true_masks)

            if gpu:
                imgs = imgs.cuda()
                true_masks = true_masks.cuda()

            masks_pred = net(imgs)
            masks_probs_flat = masks_pred.view(-1)

            true_masks_flat = true_masks.view(-1)

            loss = criterion(masks_probs_flat, true_masks_flat)
            epoch_loss += loss.item()

            print(('{0:.4f} --- loss: {1:.6f}'.format(i * batch_size / N_train, loss.item())))

            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

        print(('Epoch finished ! Loss: {}'.format(epoch_loss / i)))

        if 1:
            val_dice = eval_net(net, val, gpu)
            print(('Validation Dice Coeff: {}'.format(val_dice)))

        if save_cp:
            torch.save(net.state_dict(),
                       dir_checkpoint + 'cropped_CP{}.pth'.format(epoch + 1))
            print(('Checkpoint {} saved !'.format(epoch + 1)))
Ejemplo n.º 21
0
if __name__ == "__main__":
    args = parser.parse_args()
    seed = args.seed
    bs = args.bs
    lr = args.lr
    width = args.width
    depth = args.depth
    epochs = args.epochs
    verbose = args.verbose

    # set random seed
    set_random_seed(seed)
    # define the device for training
    device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
    # get training ids
    train_ids, valid_ids, test_ids = get_ids(65)
    # define dataloaders
    if args.dynamic:
        train_data = TrainDataset(train_ids)
        train_iter = DataLoader(train_data,
                                batch_size=bs,
                                num_workers=6,
                                sampler=LoopSampler)
    else:
        train_data = StaticTrainDataset(train_ids)
        train_iter = DataLoader(train_data,
                                batch_size=bs,
                                num_workers=6,
                                shuffle=True)

    train_tdata = TestDataset(train_ids)
Ejemplo n.º 22
0
def train_net(net,
              device,
              epochs=5,
              batch_size=1,
              lr=0.1,
              val_percent=0.15,
              save_cp=True,
              img_scale=0.5):
    ids = get_ids(dir_img)

    iddataset = split_train_val(ids, val_percent)

    n_train = len(iddataset['train'])
    n_val = len(iddataset['val'])
    optimizer = optim.Adam(net.parameters(), lr=lr)
    if net.n_classes > 1:
        criterion = nn.CrossEntropyLoss()
    else:
        criterion = nn.BCEWithLogitsLoss()

    for epoch in range(epochs):
        net.train()

        # reset the generators
        train = get_imgs_and_masks(iddataset['train'], dir_img, dir_mask,
                                   img_scale)
        val = get_imgs_and_masks(iddataset['val'], dir_img, dir_mask,
                                 img_scale)

        epoch_loss = 0
        with tqdm(total=n_train,
                  desc='Epoch {0}/{1}'.format(epoch + 1, epochs),
                  unit='img') as pbar:
            for i, b in enumerate(batch(train, batch_size)):
                current_lr = adjust_learning_rate(optimizer, epoch, epochs,
                                                  pbar.n, n_train)
                random_rate = 0
                if epoch > epochs / 2:
                    random_rate = (epoch * 0.1) / epochs
                    b = custom_transforms.random_data_augmentation(
                        b, random_rate=random_rate)

                imgs = np.array([i[0] for i in b]).astype(np.float32)
                true_masks = np.array([i[1][0] for i in b])

                imgs = torch.from_numpy(imgs)
                true_masks = torch.from_numpy(true_masks)

                imgs = imgs.to(device=device)
                true_masks = true_masks.to(device=device)

                masks_pred = net(imgs)
                loss = criterion(masks_pred, true_masks.long())
                epoch_loss += loss.item()

                pbar.set_postfix(
                    **{
                        'lr:{0}, random_rate:{1}, loss:'.format(
                            current_lr, random_rate):
                        loss.item()
                    })

                optimizer.zero_grad()
                loss.backward()
                optimizer.step()

                pbar.update(batch_size)

        if save_cp:
            try:
                os.mkdir(dir_checkpoint)
                logging.info('Created checkpoint directory')
            except OSError:
                pass
            torch.save(net.state_dict(),
                       dir_checkpoint + 'CP_epoch{0}.pth'.format(epoch + 1))
            logging.info('Checkpoint {0} saved !'.format(epoch + 1))

        val_score = eval_net(net, val, device, n_val)
        if net.n_classes > 1:
            logging.info('Validation cross entropy: {0}'.format(val_score))
        else:
            logging.info('Validation Dice Coeff: {0}'.format(val_score))
Ejemplo n.º 23
0
                      action='store_true',
                      default=True,
                      help='whether to save checkpoint')

    (options, args) = parser.parse_args()
    return options


if __name__ == '__main__':
    args = get_args()
    # dir_img = '/home/zzh/数据/mid project/raw_data'
    # dir_mask = '/home/zzh/数据/mid project/groundtruth'

    dir_img = '/home/zhuzhu/Desktop/mid project/raw_data'
    dir_mask = '/home/zhuzhu/Desktop/mid project/groundtruth'
    ids = get_ids(dir_img)  # 1,2,3,...的生成器

    iddataset = split_train_val(
        ids, args.val_percent)  # {'train':[23,98,59,...],'val':[12,37,48,...]}

    net = UNet(n_channels=1, n_classes=args.num_classes)
    optimizer = torch.optim.SGD(net.parameters(),
                                lr=args.lr,
                                momentum=0.99,
                                weight_decay=5e-3)
    scheduler = StepLR(optimizer, step_size=30, gamma=0.1)
    criterion = nn.BCEWithLogitsLoss()
    if args.load:
        print('load model from checkpoint')
        net.load_state_dict(torch.load('checkpoint/unet.pth'))
Ejemplo n.º 24
0
def train_net(net,
              device,
              epochs=5,
              batch_size=1,
              lr=0.1,
              val_percent=0.15,
              save_cp=True,
              img_scale=0.5):
    ids = get_ids(dir_img)

    iddataset = split_train_val(ids, val_percent)

    logging.info('''Starting training:
        Epochs:          {epochs}
        Batch size:      {batch_size}
        Learning rate:   {lr}
        Training size:   {len(iddataset["train"])}
        Validation size: {len(iddataset["val"])}
        Checkpoints:     {save_cp}
        Device:          {device.type}
        Images scaling:  {img_scale}
    ''')

    n_train = len(iddataset['train'])
    n_val = len(iddataset['val'])
    optimizer = optim.Adam(net.parameters(), lr=lr)
    if net.n_classes > 1:
        criterion = nn.CrossEntropyLoss()
    else:
        criterion = nn.BCEWithLogitsLoss()

    for epoch in range(epochs):
        net.train()

        # reset the generators
        train = get_imgs_and_masks(iddataset['train'], dir_img, dir_mask,
                                   img_scale)
        val = get_imgs_and_masks(iddataset['val'], dir_img, dir_mask,
                                 img_scale)

        epoch_loss = 0
        f1_score = 0
        num = 0
        with tqdm(total=n_train, desc='Epoch {epoch + 1}/{epochs}',
                  unit='img') as pbar:
            for i, b in enumerate(batch(train, batch_size)):
                imgs = np.array([i[0] for i in b]).astype(np.float32)
                true_masks = np.array([i[1] for i in b])

                imgs = torch.from_numpy(imgs)
                true_masks = torch.from_numpy(true_masks)

                imgs = imgs.to(device=device)
                true_masks = true_masks.to(device=device)

                masks_pred = net(imgs)
                # print('mask:',masks_pred.size())
                # print('lab:',true_masks.size())
                loss = criterion(masks_pred, true_masks)
                masks_pred_np = masks_pred.detach().cpu().numpy()
                true_masks_np = true_masks.detach().cpu().numpy()
                epoch_loss += loss.item()

                # print("----------------------------------------")
                # print('masks_pred',type(masks_pred),masks_pred,'\n')
                # print('true_masks',type(true_masks),true_masks,'\n')
                # print('mask:',masks_pred.size(),'\n')
                # print('lab:',true_masks.size(),'\n')
                pre_2D = np.array(masks_pred_np[0][0])
                true_2D = np.array(true_masks_np[0][0])
                pre_2D_threhold = pre_2D
                pre_2D_threhold[pre_2D_threhold > 0.5] = 1
                pre_2D_threhold[pre_2D_threhold <= 0.5] = 0
                # print("pre_2D.shape",pre_2D.shape,'\n')
                # print("true_2D.shape" ,true_2D.shape,'\n')
                # print("true_2D.flatten()",true_2D.flatten(),'\n')
                # print("pre_2D.flatten()",pre_2D.flatten(),'\n')
                pixel_accuracy = (pre_2D, true_2D)
                f1_score += metrics.f1_score(true_2D.flatten(),
                                             pre_2D_threhold.flatten())
                num = num + 1
                # print("----------------------------------------")

                # val_score1 = eval_net1(net,val,device,n_val)

                pbar.set_postfix(**{'loss (batch)': loss.item()})

                optimizer.zero_grad()
                loss.backward()
                optimizer.step()

                pbar.update(batch_size)

        if save_cp:
            try:
                os.mkdir(dir_checkpoint)
                logging.info('Created checkpoint directory')
            except OSError:
                pass
            torch.save(net.state_dict(),
                       dir_checkpoint + 'CP_epoch{epoch + 1}.pth')
            logging.info('Checkpoint {epoch + 1} saved !')

        val_score = eval_net(net, val, device, n_val)
        f1_score /= num
        print("f1-score:", f1_score, '\n')
        if net.n_classes > 1:
            logging.info('Validation cross entropy: {}'.format(val_score))

        else:
            logging.info('Validation Dice Coeff: {}'.format(val_score))
Ejemplo n.º 25
0
 pre_list_all = []
 re_list_all = []
 f1_list_all = []
 filepath = './bugreports_sds/'
 step = 6
 bc = BertClient()
 sentences = []
 vectors = []
 for i in range(36):
     report_sent = []
     with open(filepath + str(i + 1) + '.txt', "r", encoding='utf-8') as f:
         for line in f.readlines():
             report_sent.append(line.strip('\n'))
     sentences.append(report_sent)
 labels_ids = read_label('./data/goldset_sds.txt')
 ids = get_ids()
 labels = []
 for index, id_list in enumerate(ids):
     label = []
     for id in id_list:
         if id in labels_ids[index]:
             label.append(1)
         else:
             label.append(0)
     labels.append(label)
 sentences, labels = clear_data(sentences, labels)
 for i in range(0, 36, step):
     # model = create_classify_dense(EMBEDDING_DIM)
     model = create_classify_lstm_att(EMBEDDING_DIM, HIDDEN_SIZE,
                                      ATTENTION_SIZE)
     # model = create_classify_textcnn(EMBEDDING_DIM)
Ejemplo n.º 26
0
def train_net(net,
              epochs=30,
              batch_size=6,
              lr=0.1,
              val_percent=0.05,
              save_cp=True,
              gpu=False,
              img_scale=0.5):

#    dir_img = 'E:/A_paper_thesis/paper5/tensorflow_deeplabv3plus_scrapingData/dataset/Scraping_Data2/train_db'
    dir_img = 'data/train_db/'
    dir_mask = 'data/GT_bw/'
    dir_checkpoint = 'checkpoint0919/'

    ids = get_ids(dir_img)
    ids = split_ids(ids)

    iddataset = split_train_val(ids, val_percent)

    print('''
    Starting training:
        Epochs: {}
        Batch size: {}
        Learning rate: {}
        Training size: {}
        Validation size: {}
        Checkpoints: {}
        CUDA: {}
    '''.format(epochs, batch_size, lr, len(iddataset['train']),
               len(iddataset['val']), str(save_cp), str(gpu)))

    N_train = len(iddataset['train'])

    optimizer = optim.SGD(net.parameters(),
                          lr=lr,
                          momentum=0.9,
                          weight_decay=0.0005)

    criterion = nn.BCELoss()
    

    for epoch in range(epochs):
        print('Starting epoch {}/{}.'.format(epoch + 1, epochs))
        net.train()

        # reset the generators
        train = get_imgs_and_masks(iddataset['train'], dir_img, dir_mask, img_scale)
        val = get_imgs_and_masks(iddataset['val'], dir_img, dir_mask, img_scale)

        epoch_loss = 0
        epoch_iou = 0
        epoch_xor=0

        for i, b in enumerate(batch(train, batch_size)):
            imgs = np.array([i[0] for i in b]).astype(np.float32)
            true_masks = np.array([i[1] for i in b])

            imgs = torch.from_numpy(imgs)
            true_masks = torch.from_numpy(true_masks)

            if gpu:
                imgs = imgs.cuda()
                true_masks = true_masks.cuda()

            masks_pred = net(imgs)
            masks_probs_flat = masks_pred.view(-1)

            true_masks_flat = true_masks.view(-1)

            loss = criterion(masks_probs_flat, true_masks_flat)
            epoch_loss += loss.item()
            
            print('step:', i)

#            print('Validation Dice Coeff: {0:.4f} --- loss: {1:.6f}'.format(i * batch_size / N_train, loss.item()))
            print('Validation Dice Coeff: {0:.4f} --- loss: {1:.6f}'.format(i * batch_size / N_train, loss.item()))
            

            
#            # mean iou
#            intersect = sum(masks_probs_flat*true_masks_flat)
#            union = sum(masks_probs_flat+true_masks_flat)
#            iou = (intersect+0.001)/(union-intersect+0.001)
#            epoch_iou +=iou
            
            # mean iou
            smooth = 1e-6 # we smooth to avoid our devision 0/0
            intersect = sum(masks_probs_flat*true_masks_flat)
            union = sum(masks_probs_flat+true_masks_flat)-intersect
            iou = (intersect+smooth)/(union+smooth)
            epoch_iou +=iou
            
            # calculate xor
            # xor quation is: xor = (union(output hợp ground truth) - intersect(output giao ground truth))/ ground truth
            # xor =  (union-intersect)/ground truth
            
            xor = (union - intersect)/sum(true_masks_flat)
            epoch_xor += xor
            


            print('mean IoU: {:.4f}'.format(iou))
#            print('mean IoU1: {:.4f}'.format(iou1))
            print('mean xor: {:.4f}'.format(xor))
            
            # end of mean iou

            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

        print('Epoch finished ! epoch_Loss: {:.6f}'.format(epoch_loss / i))
        print('epoch_iou: {:.4f}'.format(epoch_iou / i))
        print('epoch_xor: {:.4f}'.format(epoch_xor / i))

        if 1:
            val_dice = eval_net(net, val, gpu)
            print('epoch_Validation Dice Coeff: {:.4f}'.format(val_dice))
            # need to write mean iou of evaluate here(reference val_dice)
          

        if save_cp:
            torch.save(net.state_dict(),
                       dir_checkpoint + 'CP{}.pth'.format(epoch + 1))
            print('Checkpoint {} saved !'.format(epoch + 1))
Ejemplo n.º 27
0
def preprocess(file, BATCH_SIZE, max_length, tokenizer):
    train_dataset = []
    input_vocab_size = len(tokenizer.vocab)
    f = open(file, 'r')

    words = f.read()

    words = words.replace('\n\n', '.')
    words = words.replace('\n', ' ')
    words = re.split('[;:.!?]', words)

    i = 0
    for _ in range(len(words) // BATCH_SIZE + 1):
        if i + 1 >= len(words):
            break
        input_ids_list = []
        segment_list = []
        is_masked_list = []
        is_next_list = []

        for j in range(BATCH_SIZE):
            if i + 1 >= len(words):
                break

            now = int(
                random.random() > 0.5
            )  # decide if the 2nd sentence has to be next sentence or not

            if now == 1:
                res = ["[CLS]"] + tokenizer.tokenize(words[i]) + [
                    "[SEP]"
                ] + tokenizer.tokenize(words[i + 1]) + ["[SEP]"]
            else:
                res = ["[CLS]"] + tokenizer.tokenize(
                    words[i]) + ["[SEP]"] + tokenizer.tokenize(
                        words[random.randint(0,
                                             len(words) - 1)]) + ["[SEP]"]

            input_ids = get_ids(res, tokenizer, max_length)
            segment_list.append(get_segments(res, max_length))
            is_next_list.append(now)
            is_masked = [0] * max_length

            for ind in range(max_length):
                if input_ids[ind] == 0:  # is padding token appears, then break
                    break
                if input_ids[ind] == 101 or input_ids[
                        ind] == 102:  # don't mask [CLS] and [SEP] tokens
                    continue
                if random.random() < 0.15:  # mask 15% of tokens
                    is_masked[ind] = input_ids[ind]
                    if random.random() < 0.8:  # out of 15%, mask 80%
                        input_ids[ind] = 103
                    elif random.random(
                    ) < 0.5:  # replace 10% with random token
                        input_ids[ind] = random.randint(1000, input_vocab_size)
                        #in the remaining tokens, keep the same token
            input_ids_list.append(input_ids)
            is_masked_list.append(is_masked)
            if now == 1:
                i += 2
            else:
                i += 1

        input_ids_list = np.array(input_ids_list)
        is_masked_list = np.array(is_masked_list)
        masks = create_padding_mask(input_ids_list)
        segment_list = np.array(segment_list)
        is_next_list = np.array(is_next_list)
        is_next_list = np.reshape(is_next_list, (len(is_next_list), 1))
        train_dataset.append([
            input_ids_list, segment_list, masks, is_next_list, is_masked_list
        ])

    return train_dataset
def train_net(
        net,
        epochs=5,
        batch_size=1,
        lr=0.1,
        #val_percent=0.1,
        save_cp=True,
        gpu=False,
        img_scale=0.5):

    img_train = '/home/lixiaoxing/github/Pytorch-UNet/data/DRIVE/AV_groundTruth/training/images_jpg/'
    mask_train = '/home/lixiaoxing/github/Pytorch-UNet/data/DRIVE/AV_groundTruth/training/vessel/'
    img_val = '/home/lixiaoxing/github/Pytorch-UNet/data/DRIVE/AV_groundTruth/training/val_jpg/'
    mask_val = '/home/lixiaoxing/github/Pytorch-UNet/data/DRIVE/AV_groundTruth/training/val_vessel/'
    dir_checkpoint = 'checkpoints_drive3_adam/'
    if os.path.exists(dir_checkpoint) is False:
        os.makedirs(dir_checkpoint)

    ids_train = get_ids(img_train)
    data_train = split_ids(ids_train)
    data_train = list(data_train)
    ids_val = get_ids(img_val)
    data_val = split_ids(ids_val)
    data_val = list(data_val)

    #iddataset = split_train_val(ids, val_percent)

    print('''
    Starting training:
        Epochs: {}
        Batch size: {}
        Learning rate: {}
        Training size: {}
        Validation size: {}
        Checkpoints: {}
        CUDA: {}
    '''.format(epochs, batch_size, lr, len(data_train), len(data_val),
               str(save_cp), str(gpu)))

    N_train = len(data_train)

    #optimizer = optim.SGD(net.parameters(),
    #                      lr=lr,
    #                      momentum=0.9,
    #                      weight_decay=0.0005)

    optimizer = optim.Adam(net.parameters(), lr=lr, weight_decay=1e-5)

    scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer,
                                                           factor=0.1,
                                                           mode='min',
                                                           patience=3,
                                                           verbose=True)
    #scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=50, gamma=0.1)
    criterion = nn.BCELoss()
    #criterion = DiceCoeff()

    for epoch in range(epochs):
        print('Starting epoch {}/{}.'.format(epoch + 1, epochs))
        net.train()

        # reset the generators
        train = get_imgs_and_masks_y(data_train, img_train, mask_train,
                                     img_scale)
        val = get_imgs_and_masks_y(data_val, img_val, mask_val, img_scale)

        epoch_loss = 0

        for i, b in enumerate(batch(train, batch_size)):
            imgs = np.array([i[0] for i in b]).astype(np.float32)
            true_masks = np.array([i[1] for i in b])

            imgs = torch.from_numpy(imgs)
            true_masks = torch.from_numpy(true_masks)

            if gpu:
                imgs = imgs.cuda()
                true_masks = true_masks.cuda()

            masks_pred = net(imgs)
            masks_probs_flat = masks_pred.view(-1)

            true_masks_flat = true_masks.view(-1)

            #print(masks_pred.shape, true_masks.shape)

            loss = criterion(masks_probs_flat, true_masks_flat)
            epoch_loss += loss.item()

            #print('{0:.4f} --- loss: {1:.6f}'.format(i * batch_size / N_train, loss.item()))

            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

        print('Epoch finished ! Loss: {}'.format(epoch_loss / i))

        if 1:
            val_dice = eval_net(net, val, gpu)
            print('Validation Dice Coeff: {}'.format(val_dice))

        if save_cp:
            torch.save(net.state_dict(),
                       dir_checkpoint + 'CP{}.pth'.format(epoch + 1))
            print('Checkpoint {} saved !'.format(epoch + 1))

        scheduler.step(val_dice)
Ejemplo n.º 29
0
# # plt.plot(hist_img, 'k-')
# plt.plot(hist_coal, 'r-')
# plt.plot(hist_gangue, 'g-')
# plt.xlim([0, 256])

# plt.show()

from sklearn.metrics import precision_recall_curve, roc_curve, classification_report
from utils import get_imgs_and_masks, get_ids, split_train_val
from unet import UNet
import torch

ori_w, ori_h = 852, 480
dir_img = '/home/zhuzhu/Desktop/mid project/raw_data'
dir_mask = '/home/zhuzhu/Desktop/mid project/groundtruth'
ids = get_ids(dir_img)
iddataset = split_train_val(ids, 0.05)

net = UNet(1, 2)
net.eval()
net.load_state_dict(
    torch.load(
        '/media/zhuzhu/0C5809B80C5809B8/draft/unet/checkpoint/unet_0.854608765.pth',
        map_location='cpu'))
val = get_imgs_and_masks(iddataset['val'], dir_img, dir_mask)

c = 0
for i, b in enumerate(val):
    img = np.array(b[0]).astype(np.float32)
    mask = np.array(b[1]).astype(np.float32)
Ejemplo n.º 30
0
def train_net(args, net, val_percent=0.05, save_cp=True):

    dir_img = os.path.join(args.dataset_folder, 'data/train/')
    dir_mask = os.path.join(args.dataset_folder, 'data/train_masks/')
    dir_checkpoint = os.path.join(args.dataset_folder, 'checkpoints/')
    if not os.path.exists(dir_checkpoint):
        os.makedirs(dir_checkpoint)

    ids = get_ids(dir_img)
    ids = split_ids(ids)

    iddataset = split_train_val(ids, val_percent)

    print('''
    Starting training:
        Epochs: {}
        Batch size: {}
        Learning rate: {}
        Training size: {}
        Validation size: {}
        Checkpoints: {}
    '''.format(args.epochs, args.batch_size, args.lr, len(iddataset['train']),
               len(iddataset['val']), str(save_cp)))

    N_train = len(iddataset['train'])

    optimizer = optim.SGD(net.parameters(),
                          lr=args.lr,
                          momentum=0.9,
                          weight_decay=0.0005)

    criterion = nn.BCELoss()

    for epoch in range(args.epochs):
        print('Starting epoch {}/{}.'.format(args.epochs + 1, args.epochs))
        net.train()

        # reset the generators
        train = get_imgs_and_masks(iddataset['train'], dir_img, dir_mask,
                                   args.img_scale)
        val = get_imgs_and_masks(iddataset['val'], dir_img, dir_mask,
                                 args.img_scale)

        epoch_loss = 0

        for i, b in enumerate(batch(train, args.batch_size)):
            imgs = np.array([i[0] for i in b]).astype(np.float32)
            true_masks = np.array([i[1] for i in b])

            imgs = torch.from_numpy(imgs)
            true_masks = torch.from_numpy(true_masks)

            # if gpu:
            imgs = imgs.cuda()
            true_masks = true_masks.cuda()

            masks_pred = net(imgs)
            masks_probs_flat = masks_pred.view(-1)

            true_masks_flat = true_masks.view(-1)

            loss = criterion(masks_probs_flat, true_masks_flat)
            epoch_loss += loss.item()

            print('{0:.4f} --- loss: {1:.6f}'.format(
                i * args.batch_size / N_train, loss.item()))

            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

        print('Epoch finished ! Loss: {}'.format(epoch_loss / i))

        if 1:
            val_dice = eval_net(net, val)
            print('Validation Dice Coeff: {}'.format(val_dice))

        if save_cp:
            torch.save(net.state_dict(),
                       dir_checkpoint + 'CP{}.pth'.format(epoch + 1))
            print('Checkpoint {} saved !'.format(epoch + 1))
Ejemplo n.º 31
0
def train_net(net,
              device,
              epochs=5,
              batch_size=1,
              lr=0.1,
              val_percent=0.15,
              save_cp=True,
              img_scale=0.5):
    ids = get_ids(dir_img)

    iddataset = split_train_val(ids, val_percent)

    logging.info('''Starting training:
        Epochs:          {epochs}
        Batch size:      {batch_size}
        Learning rate:   {lr}
        Training size:   {len(iddataset["train"])}
        Validation size: {len(iddataset["val"])}
        Checkpoints:     {save_cp}
        Device:          {device.type}
        Images scaling:  {img_scale}
    ''')

    n_train = len(iddataset['train'])
    n_val = len(iddataset['val'])
    optimizer = optim.Adam(net.parameters(), lr=lr)
    #optimizer = optim.SGD(net.parameters(), lr=lr, momentum=0.75)
    criterion = nn.BCELoss()

    for epoch in range(epochs):
        net.train()

        # reset the generators
        train = get_imgs_and_masks(iddataset['train'], dir_img, dir_mask,
                                   img_scale)
        val = get_imgs_and_masks(iddataset['val'], dir_img, dir_mask,
                                 img_scale)

        epoch_loss = 0
        with tqdm(total=n_train, desc='Epoch {epoch + 1}/{epochs}',
                  unit='img') as pbar:
            for i, b in enumerate(batch(train, batch_size)):
                imgs = np.array([i[0] for i in b]).astype(np.float32)
                true_masks = np.array([i[1] for i in b])

                imgs = torch.from_numpy(imgs)
                true_masks = torch.from_numpy(true_masks)

                imgs = imgs.to(device=device)
                true_masks = true_masks.to(device=device)

                masks_pred = net(imgs)
                loss = criterion(masks_pred, true_masks)
                epoch_loss += loss.item()

                pbar.set_postfix(**{'loss (batch)': loss.item()})

                optimizer.zero_grad()
                loss.backward()
                optimizer.step()

                pbar.update(batch_size)

        if save_cp:
            try:
                os.mkdir(dir_checkpoint)
                logging.info('Created checkpoint directory')
            except OSError:
                pass
            torch.save(net.state_dict(), dir_checkpoint + 'gpu_3.pth')
            logging.info('gpu_3 saved !')

        val_dice = eval_net(net, val, device, n_val)
        logging.info('Validation Dice Coeff: {}'.format(val_dice))
Ejemplo n.º 32
0
def submission(model,
               sampling_method,
               data_dir,
               results_dir,
               device='cpu',
               verbose=True):
    if verbose:
        print("Using device: {}".format(device))
        print("Reading train data in...")
    if model == 'lgbm':
        X_train, Y_train, feature_labels = get_train(data_dir, one_hot=False)
    else:
        X_train, Y_train, feature_labels = get_train(data_dir)

    X_test = get_test(data_dir)
    train_ids, test_ids = get_ids(data_dir)
    country_names = get_country_names(data_dir)

    if verbose:
        print("Successfully loaded data")

    lgbm_params = {
        'task': 'train',
        'objective': 'multiclass',
        'num_class': 12,
        'num_leaves': 31,
        'learning_rate': 0.3,
        'lambda_l2': 1.0,
        'feature_fraction': 0.9,
        'min_child_weight': 1.0,
        'device': device,
        'gpu_device_id': 0,
        'gpu_platform_id': 0,
        'max_bin': 63,
        'verbose': 0
    }

    if device == 'cpu':
        xgb_params = {
            "objective": "multi:softprob",
            "num_class": 12,
            "tree_method": "hist",
            "colsample_bytree": 0.9,
            "n_jobs": 2,
            "silent": 1
        }
    else:
        xgb_params = {
            "objective": "multi:softprob",
            "num_class": 12,
            "tree_method": "gpu_hist",
            "colsample_bytree": 0.9,
            "gpu_id": 0,
            "max_bin": 16,
            "silent": 1
        }
    if verbose:
        print("{} sampling process started...".format(sampling_method))
    curr_time = time.time()

    if sampling_method == "adasyn":
        X_train_resampled, Y_train_resampled = ADASYN().fit_sample(
            X_train, Y_train)
    elif sampling_method == "smote":
        X_train_resampled, Y_train_resampled = SMOTE().fit_sample(
            X_train, Y_train)
    elif sampling_method == "random":
        X_train_resampled, Y_train_resampled = RandomOverSampler().fit_sample(
            X_train, Y_train)
    elif sampling_method == "smoteenn":
        X_train_resampled, Y_train_resampled = SMOTEENN().fit_sample(
            X_train, Y_train)
    else:
        X_train_resampled, Y_train_resampled = X_train, Y_train

    if verbose:
        print("Oversampling completed")
        print("Time Taken: {:.2f}".format(time.time() - curr_time))
        print("Size of Oversampled data: {}".format(X_train_resampled.shape))
        print("{} selected for classification".format(model))

    curr_time = time.time()
    if model == 'lgbm':
        categorical_feature = [
            'age_bucket', 'gender', 'signup_method', 'signup_flow', 'language',
            'affiliate_channel', 'affiliate_provider',
            'first_affiliate_tracked', 'signup_app', 'first_device_type',
            'first_browser'
        ]
        lgb_train = lgb.Dataset(data=X_train_resampled,
                                label=Y_train_resampled,
                                feature_name=feature_labels,
                                categorical_feature=categorical_feature)
        clf = lgb.train(lgbm_params, lgb_train, num_boost_round=30)
        print("Time taken: {:.2f}".format(time.time() - curr_time))
        Y_probs = clf.predict(X_test)
        order = np.argsort(-Y_probs[:, :5], axis=1)
    else:
        X_train_xgb = xgb.DMatrix(X_train_resampled,
                                  Y_train_resampled,
                                  feature_names=feature_labels)
        X_test_xgb = xgb.DMatrix(X_test, feature_names=feature_labels)
        clf = xgb.train(xgb_params, X_train_xgb, 30)
        print("Time taken: {:.2f}".format(time.time() - curr_time))
        Y_probs = clf.predict(X_test_xgb)
        order = np.argsort(-Y_probs[:, :5], axis=1)

    print("Generating submission csv...")
    with open(os.path.join(results_dir, 'submission_{}.csv'.format(model)),
              'w') as f:
        writer = csv.writer(f, delimiter=',', quoting=csv.QUOTE_MINIMAL)
        writer.writerow(['id', 'country'])
        for i in range(len(test_ids)):
            for k in range(5):
                writer.writerow([test_ids[i], country_names[order[i, k]]])
    print("Finished.")
Ejemplo n.º 33
0
def train_net(net,
              epochs=5,
              batch_size=1,
              lr=0.1,
              val_percent=0.05,
              save_cp=True,
              gpu=False,
              img_scale=0.5):

    # dir_img = 'data/train/'
    # dir_mask = 'data/train_masks/'
    dir_img = 'E:/git/dataset/tgs-salt-identification-challenge/train/images/'
    dir_mask = 'E:/git/dataset/tgs-salt-identification-challenge/train/masks/'
    # dir_img = 'E:/git/dataset/tgs-salt-identification-challenge/train/my_images/'
    # dir_mask = 'E:/git/dataset/tgs-salt-identification-challenge/train/my_masks/'
    dir_checkpoint = 'checkpoints/'

    ids = get_ids(dir_img)
    ids = split_ids(ids)

    iddataset = split_train_val(ids, val_percent)

    print('''
    Starting training:
        Epochs: {}
        Batch size: {}
        Learning rate: {}
        Training size: {}
        Validation size: {}
        Checkpoints: {}
        CUDA: {}
    '''.format(epochs, batch_size, lr, len(iddataset['train']),
               len(iddataset['val']), str(save_cp), str(gpu)))

    N_train = len(iddataset['train'])

    optimizer = optim.SGD(net.parameters(),
                          lr=lr,
                          momentum=0.9,
                          weight_decay=0.0005)

    criterion = nn.BCELoss()

    for epoch in range(epochs):
        print('Starting epoch {}/{}.'.format(epoch + 1, epochs))
        net.train()

        # reset the generators
        train = get_imgs_and_masks(iddataset['train'], dir_img, dir_mask,
                                   img_scale)
        val = get_imgs_and_masks(iddataset['val'], dir_img, dir_mask,
                                 img_scale)

        epoch_loss = 0

        for i, b in enumerate(batch(train, batch_size)):
            imgs = np.array([i[0] for i in b]).astype(np.float32)
            # true_masks = np.array([i[1] for i in b])#np.rot90(m)
            true_masks = np.array([i[1].T / 65535 for i in b])  #np.rot90(m)

            # show_batch_image(true_masks)
            imgs = torch.from_numpy(imgs)
            true_masks = torch.from_numpy(true_masks)

            if gpu:
                imgs = imgs.cuda()
                true_masks = true_masks.cuda()

            # show_batch_image(imgs)

            masks_pred = net(imgs)
            masks_probs_flat = masks_pred.view(-1)

            true_masks_flat = true_masks.view(-1)

            loss = criterion(masks_probs_flat, true_masks_flat)
            epoch_loss += loss.item()

            print('{0:.4f} --- loss: {1:.6f}'.format(i * batch_size / N_train,
                                                     loss.item()))

            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

        print('Epoch finished ! Loss: {}'.format(epoch_loss / i))

        if 1:
            val_dice = eval_net(net, val, gpu)
            print('Validation Dice Coeff: {}'.format(val_dice))

        if save_cp:
            torch.save(net.state_dict(),
                       dir_checkpoint + 'CP{}.pth'.format(epoch + 1))
            print('Checkpoint {} saved !'.format(epoch + 1))
Ejemplo n.º 34
0
def get_all_group_ids(amount):
    global group_ids
    group_ids = utils.get_ids(scim.search_with_get_on_groups, amount)