Ejemplo n.º 1
0
def train():
    sess = tf.Session(config=tf.ConfigProto(gpu_options=gpu_options))
    sess.run(tf.global_variables_initializer())
    saver = tf.train.Saver(max_to_keep=max_to_keep)
    utils.load_ckpt(sess, ckpt_dir, saver)
    best_loss = 1e6
    start_epoch = 0
    if history_file.exists():
        df = pd.read_csv(history_file)
        best_loss = df['best_loss'].min()
        start_epoch = int(df.iloc[-1]['epoch']) + 1

    print('Training ...')
    for epoch in range(start_epoch, num_epochs):
        train_loss, train_lr = train_one_epoch(sess, epoch, saver)
        val_loss, best_loss = val_one_epoch(sess, epoch, saver, best_loss)
        csv_header = ['epoch', 'lr', 'train_loss', 'val_loss', 'best_loss']
        csv_values = [epoch, train_lr, train_loss, val_loss, best_loss]
        utils.log_csv(history_file,
                      csv_values,
                      header=csv_header if epoch == 0 else None)
        print(
            f'[{opt.area}-{opt.mode}] Epoch {epoch} loss:{train_loss:.6f}, val loss:{val_loss:.6f},duration:{time.time() - start_time:.3f}s'
        )

    print('Training completed...')
Ejemplo n.º 2
0
def test(model,data_loader,cfg,test_args):
    if test_args['load_ckpt'] is not None:
        load_ckpt(test_args,model)
    model.eval()
    error_total = {'err_absRel': 0.0, 'err_squaRel': 0.0, 'err_rms': 0.0,
                         'err_silog': 0.0, 'err_logRms': 0.0, 'err_silog2': 0.0,
                         'err_delta1': 0.0, 'err_delta2': 0.0, 'err_delta3': 0.0,
                         'err_log10': 0.0, 'err_whdr': 0.0}
    n_pxl_total = 0
    eval_num_total = 0
    for i, data in enumerate(tqdm(data_loader)):
        output = model.inference(data)
        pred_depth = torch.squeeze(output['b_fake'])
        img_path = data['A_paths']
        invalid_side = data['invalid_side'][0]
        pred_depth = pred_depth[invalid_side[0]:pred_depth.size(0) - invalid_side[1], :]
        pred_depth = pred_depth / data['ratio'].to(cfg['device']) # scale the depth
        pred_depth = resize_image(pred_depth, torch.squeeze(data['B_raw']).shape)

        if i % 10 == 0:
            Img = vutils.make_grid(data['A'].data.cpu(),normalize = True,scale_each = True)
            GT_depth = vutils.make_grid(data['B_raw'].data.cpu(),normalize = True,scale_each = True)
            Estimated_depth = vutils.make_grid(torch.from_numpy(pred_depth),normalize = True,scale_each = True)
            Edge = vutils.make_grid(data['E'].unsqueeze(1).repeat(1,3,1,1).data.cpu(),normalize = True,scale_each = True)
            writer.add_image('RGB',Img,i)
            writer.add_image('GT_Depth',GT_depth,i)
            writer.add_image('Predicted_Depth',Estimated_depth,i)
            writer.add_image('Edge',Edge,i)

        error_batch,n_pxl,eval_num = evaluate_err(pred_depth, data['B_raw'], mask=(45, 471, 41, 601), scale=10.)
        for (k1,v1), (k2,v2) in zip(error_total.items(), error_batch.items()):
            error_total[k1] += error_batch[k2]
        #error_total = error_batch
        n_pxl_total = n_pxl_total + n_pxl
        eval_num_total = eval_num_total + eval_num
    error = calculate_average_error(error_total,n_pxl_total,eval_num_total)
    print('----------------------------------------------------------')
    print('absREL: %f'%error['err_absRel'])
    print('silog: %f'%np.sqrt(error['err_silog2'] - (error['err_silog'])**2))
    print('log10: %f'%error['err_log10'])
    print('RMS: %f'%error['err_rms'])
    print('delta1: %f'%error['err_delta1'])
    print('delta2: %f'%error['err_delta2'])
    print('delta3: %f'%error['err_delta3'])
    print('squaRel: %f'%error['err_squaRel'])
    print('logRms: %f'%error['err_logRms'])

    print('----------------------------------------------------------')
    del error,output,pred_depth,img_path,invalid_side
    model.train()
Ejemplo n.º 3
0
def main():
    args = parser.parse_args()

    with open(args.config) as f:
        config = yaml.load(f)

    for k, v in config['common'].items():
        setattr(args, k, v)

    if not (args.model_path and os.path.isfile(args.model_path)):
        print("=> no checkpoint found at '{}'".format(args.model_path))
        return

    # create model
    print("=> creating model '{}'".format(args.arch))
    model = model_zoo[args.arch](num_classes=args.num_classes)
    model = torch.nn.DataParallel(model).cuda()

    load_ckpt(args.model_path, model)

    torch_in = torch.rand(1, 3, args.input_size, args.input_size)
    torch_in = torch_in.cuda()

    model = model.module
    model.eval()
    #print(model)
    torch_out = torch.onnx.export(model,
                                  torch_in,
                                  args.onnx_path,
                                  verbose=True,
                                  opset_version=7)
    print("Exporting onnx model to {}".format(args.onnx_path))

    transforms = []
    transforms.append({'resize': args.image_size})
    transforms.append({'center_crop': args.input_size})
    transforms.append({'to_tensor': None})
    transforms.append({
        'normalize': {
            'mean': [0.485, 0.456, 0.406],
            'std': [0.229, 0.224, 0.225]
        }
    })
    config = {'transforms': transforms}

    onnx_config_path = args.onnx_path.replace('.onnx', '-cfg.json')
    print("Exporting onnx model config to {}".format(onnx_config_path))
    with open(onnx_config_path, 'w') as f:
        json.dump(config, f)
Ejemplo n.º 4
0
    def __init__(self,
                 model,
                 batcher,
                 src_vocab,
                 tgt_vocab,
                 ckpt_id,
                 output_beams=False):
        self._model = model
        self._model()
        self._batcher = batcher
        self._src_vocab = src_vocab
        self._tgt_vocab = tgt_vocab
        self.output_beams = output_beams
        self._saver = tf.train.Saver()
        self._sess = tf.Session(config=utils.get_config())

        ckpt_path = utils.load_ckpt(self._saver, self._sess, "train", ckpt_id)

        ckpt_name = "ckpt-" + ckpt_path.split('-')[-1]
        self._decode_dir = os.path.join(
            model.hps.model_path, get_decode_dir_name(ckpt_name, model.hps))
        if os.path.exists(self._decode_dir):
            raise Exception("single_pass decode directory %s should "
                            "not already exist" % self._decode_dir)

        if not os.path.exists(self._decode_dir):
            os.mkdir(self._decode_dir)

        self._ref_dir = os.path.join(self._decode_dir, "reference")
        if not os.path.exists(self._ref_dir): os.mkdir(self._ref_dir)
        self._dec_dir = os.path.join(self._decode_dir, "decoded")
        if not os.path.exists(self._dec_dir): os.mkdir(self._dec_dir)
        self._summary_path = os.path.join(self._decode_dir, "summary.txt")
Ejemplo n.º 5
0
def bitcoin(args):
        
    A, X = utils.load_XA(args.dataset, datadir = "../Generate_XA_Data/XAL")
    L = utils.load_labels(args.dataset, datadir = "../Generate_XA_Data/XAL")
    num_classes = max(L) + 1
    input_dim = X.shape[1]
    num_nodes = X.shape[0]
    ckpt = utils.load_ckpt(args)

    print("input dim: ", input_dim, "; num classes: ", num_classes)
    
    model = models.GcnEncoderNode(
            input_dim=input_dim,
            hidden_dim=args.hidden_dim,
            embedding_dim=args.output_dim,
            label_dim=num_classes,
            num_layers=args.num_gc_layers,
            bn=args.bn,
            args=args,
        )
    
    model.load_state_dict(ckpt["model_state"]) 
    pred = ckpt["save_data"]["pred"]
    
    explainer = pe.Node_Explainer(model, A, X, pred, args.num_gc_layers)
    
    node_to_explain = [i for [i] in np.argwhere(np.sum(A,axis = 0) > 2)]
    
    explanations = explainer.explain_range(node_to_explain, num_samples = args.num_perturb_samples, top_node = args.top_node)
    
    
    print(explanations)
    
    savename = utils.gen_filesave(args)
    np.save(savename,explanations)
Ejemplo n.º 6
0
def main():
    global args
    args = parser.parse_args()

    assert args.output_path.endswith('.npy')

    print("=> creating model '{}'".format(args.arch))
    model = models.__dict__[args.arch](feature_dim=args.feature_dim)
    model = IdentityMapping(model)

    model.cuda()
    model = torch.nn.DataParallel(model).cuda()

    if args.load_path:
        classifier_keys = ['module.logits.weight', 'module.logits.bias']
        load_ckpt(args.load_path, model, ignores=classifier_keys, strict=True)

    cudnn.benchmark = True

    normalize = transforms.Normalize(mean=[0.5, 0.5, 0.5],
                                     std=[0.25, 0.25, 0.25])

    test_loader = DataLoader(BinDataset(
        args.bin_file,
        transforms.Compose([
            transforms.Resize(args.input_size),
            transforms.ToTensor(),
            normalize,
        ])),
                             batch_size=args.batch_size,
                             shuffle=False,
                             num_workers=args.workers,
                             pin_memory=True)

    features = extract(test_loader, model)
    assert features.shape[1] == args.feature_dim

    print('saving extracted features to {}'.format(args.output_path))
    folder = os.path.dirname(args.output_path)
    if folder != '' and not os.path.exists(folder):
        os.makedirs(folder)
    np.save(args.output_path, features)
Ejemplo n.º 7
0
def proc_func(infile, outfile, csv_path, csv_index):
    img_path = img_folder + infile.split('.')[0] + '/'
    video2frames(src_folder + infile, img_path)
    feature = OpticalFlowAnalyzer(img_path).analyze()
    np.savez(dst_folder + outfile, feature)

    [csv_old, index_old] = load_ckpt(ckpt_path).split('#')
    ckpt_index = str(
        max(csv_index, int(index_old)) if csv_path is csv_old else csv_index)
    ckpt_info = csv_path + '#' + ckpt_index
    save_ckpt(ckpt_info, ckpt_path)
def evaluate_bitcoin_explanation(explanations, args):
    # Get predictions
    ckpt = utils.load_ckpt(prog_args)
    pred = ckpt["save_data"]["pred"]
    pred_label = [np.argmax(p) for p in pred[0]]

    # Get ground truth
    filename_pos = os.path.join(
        '../Generate_XA_Data/ground_truth_explanation/' + prog_args.dataset,
        prog_args.dataset + '_pos.csv')
    filename_neg = os.path.join(
        '../Generate_XA_Data/ground_truth_explanation/' + prog_args.dataset,
        prog_args.dataset + '_neg.csv')
    df_pos = pd.read_csv(filename_pos, header=None, index_col=0,
                         squeeze=True).to_dict()
    df_neg = pd.read_csv(filename_neg, header=None, index_col=0,
                         squeeze=True).to_dict()

    # Evaluate
    pred_pos = 0
    true_pos = 0
    for node in explanations:
        gt = []
        if pred_label[node] == 0:
            buff_str = df_neg[node].replace('[', '')
            buff_str = buff_str.replace(']', '')
            gt = [int(s) for s in buff_str.split(',')]
        else:
            buff_str = df_pos[node].replace('[', '')
            buff_str = buff_str.replace(']', '')
            gt = [int(s) for s in buff_str.split(',')]
        ex = explanations[node]

        for e in ex:
            pred_pos = pred_pos + 1
            if e in gt:
                true_pos = true_pos + 1

    precision = true_pos / pred_pos
    print("Explainer's precision is ", precision)

    savedir = 'result/'
    if args.top_node == None:
        top = "no_top"
    else:
        top = "top_" + str(args.top_node)
    report_file_name = 'report_' + args.dataset + ".txt"
    report_file = os.path.join(savedir, report_file_name)

    with open(report_file, "a") as text_file:
        text_file.write(args.dataset + ", " + str(args.num_perturb_samples) +
                        " samples, " + top + " | Precision: " +
                        str(precision) + "\n")
        text_file.write("\n")
Ejemplo n.º 9
0
def run_eval(model, batcher, ckpt_id):
    model()
    saver = tf.train.Saver(max_to_keep=3)
    sess = tf.Session(config=utils.get_config())
    eval_dir = os.path.join(args.model_path, "eval")
    bestmodel_save_path = os.path.join(eval_dir, 'bestmodel')
    summary_writer = tf.summary.FileWriter(eval_dir)
    running_avg_loss = 0
    best_loss = None

    batch_cnt = 0
    while True:
        if not ckpt_id == -1 and batch_cnt > 100:
            break
        batch_cnt += 1
        _ = utils.load_ckpt(args, saver, sess, "train", ckpt_id)
        batch = batcher.next_batch()

        # run eval on the batch
        t0 = time.time()
        results = model.run_step(sess, batch)
        t1 = time.time()
        tf.logging.info('seconds for batch: %.2f', t1 - t0)

        # print the loss and coverage loss to screen
        loss = results['loss']
        tf.logging.info('batch_id: %d (100)\tloss: %f', batch_cnt, loss)

        # add summaries
        summaries = results['summaries']
        train_step = results['global_step']
        summary_writer.add_summary(summaries, train_step)

        # calculate running avg loss
        running_avg_loss = calc_running_avg_loss(np.asscalar(loss),
                                                 running_avg_loss,
                                                 summary_writer, train_step)

        # If running_avg_loss is best so far, save this checkpoint (early stopping).
        # These checkpoints will appear as bestmodel-<iteration_number> in the eval dir
        if best_loss is None or running_avg_loss < best_loss:
            tf.logging.info(
                'Found new best model with %.3f running_avg_loss. Saving to %s',
                running_avg_loss, bestmodel_save_path)
            saver.save(sess,
                       bestmodel_save_path,
                       global_step=train_step,
                       latest_filename='checkpoint_best')
            best_loss = running_avg_loss

        # flush the summary writer every so often
        if train_step % 100 == 0:
            summary_writer.flush()
Ejemplo n.º 10
0
def train(model,data_loader_train,data_loader_test,optimizer,criterion_1,criterion_2,cfg,train_args,test_args):
    if train_args['load_ckpt'] is not None:
        load_ckpt(train_args,model)
    model.train()
    lr = train_args['lr']
    for epoch in range(train_args['epoch']):
        print('epoch #: %d'%epoch)
        for i,data in tqdm(enumerate(data_loader_train)):
            #target = data['B_bins'].squeeze().long()#.to(cfg['device'])
            
            output,pred_depth = model.train_nyuv2(data)
            output_softmax = output['b_fake_softmax']
            output_logit = output['b_fake_logit'].cpu()
            
#            weights = calc_weights(output_softmax.cpu(),target.clone().detach())

            loss_1 = criterion_1(output_logit,data['B_bins'].squeeze().long())
            loss_2 = criterion_2(imgrad_yx(pred_depth.cpu().clone()),data['E'].cpu())
            loss = loss_1 + loss_2
            loss.backward()
            optimizer.zero_grad()
            optimizer.step()
            lr = poly_lr_scheduler(optimizer,train_args['lr'],i + epoch*len(data_loader_train))
            if i%10 == 0:
                Img = vutils.make_grid(data['A'].data.cpu(),normalize = True,scale_each = True)
                GT_depth = vutils.make_grid(data['B'].data.cpu(),normalize = True,scale_each = True)
                Estimated_depth = vutils.make_grid(pred_depth.data.cpu(),normalize = True,scale_each = True)
                Edge = vutils.make_grid(data['E'].unsqueeze(1).repeat(1,3,1,1).data.cpu(),normalize = True,scale_each = True)
                inputs = vutils.make_grid((data['A']*data['E'].unsqueeze(1).repeat(1,3,1,1)).data.cpu(),normalize = True,scale_each = True) #x*e.repeat(1,3,1,1)
                writer.add_image('RGB',Img,i + epoch*len(data_loader_train))
                writer.add_image('GT_Depth',GT_depth,i + epoch*len(data_loader_train))
                writer.add_image('Predicted_Depth',Estimated_depth,i + epoch*len(data_loader_train))
                writer.add_image('Edge',Edge,i + epoch*len(data_loader_train))
                writer.add_image('inputs',inputs,i + epoch*len(data_loader_train))
            del output['b_fake_softmax'],output_softmax,output['b_fake_logit'],output_logit,pred_depth,loss
        print(lr)

        test(model,data_loader_test,cfg,test_args)
        save_ckpt(train_args['batchsize'],save_dir = '.',step = i + epoch*len(data_loader_train),epoch = epoch,model = model,optimizer = optimizer)
Ejemplo n.º 11
0
def proc_func(infile, outfile, csv_path, csv_index):
    try:
        audio_analyzer = AudioAnalyzer(src_folder + infile)
        audio_analyzer.compute_features()
        feature = audio_analyzer.analyze()
        np.savez(dst_folder + outfile, **feature)
    except:
        pass

    [csv_old, index_old] = load_ckpt(ckpt_path).split('#')
    ckpt_index = str(max(csv_index, int(index_old))
                     if csv_path is csv_old else csv_index)
    ckpt_info = csv_path + '#' + ckpt_index
    save_ckpt(ckpt_info, ckpt_path)
Ejemplo n.º 12
0
def task_syn(args):
        
    A, X = utils.load_XA(args.dataset, datadir = "../Generate_XA_Data/XAL")
    L = utils.load_labels(args.dataset, datadir = "../Generate_XA_Data/XAL")
    num_classes = max(L) + 1
    input_dim = X.shape[1]
    ckpt = utils.load_ckpt(args)

    print("input dim: ", input_dim, "; num classes: ", num_classes)
    
    model = models.GcnEncoderNode(
            input_dim=input_dim,
            hidden_dim=args.hidden_dim,
            embedding_dim=args.output_dim,
            label_dim=num_classes,
            num_layers=args.num_gc_layers,
            bn=args.bn,
            args=args,
        )
    
    model.load_state_dict(ckpt["model_state"]) 
    pred = ckpt["save_data"]["pred"]
    
    explainer = pe.Node_Explainer(model, A, X, pred, args.num_gc_layers)
    
    explanations = {}
    if args.explain_node == None:
        if args.dataset == 'syn1': 
            explanations = explainer.explain_range(list(range(300,700)), num_samples = args.num_perturb_samples, top_node = args.top_node)
        elif args.dataset == 'syn2': 
            explanations = explainer.explain_range(list(range(300,700)) + list(range(1000,1400)), num_samples = args.num_perturb_samples, top_node = args.top_node, pred_threshold = 0.1)
        elif args.dataset == 'syn3': 
            explanations = explainer.explain_range(list(range(300,1020)), num_samples = args.num_perturb_samples, top_node = args.top_node,pred_threshold = 0.05) 
        elif args.dataset == 'syn4': 
            explanations = explainer.explain_range(list(range(511,871)), num_samples = args.num_perturb_samples, top_node = args.top_node, pred_threshold = 0.1) 
        elif args.dataset == 'syn5': 
            explanations = explainer.explain_range(list(range(511,1231)), num_samples = args.num_perturb_samples, top_node = args.top_node, pred_threshold = 0.05)     
        elif args.dataset == 'syn6': 
            explanations = explainer.explain_range(list(range(300,700)), num_samples = args.num_perturb_samples, top_node = args.top_node)
    else:
        explanation = explainer.explain(args.explain_node, num_samples = args.num_perturb_samples, top_node = args.top_node)
        print(explanation)
        explanations[args.explain_node] = explanation
    
    
    print(explanations)
    
    savename = utils.gen_filesave(args)
    np.save(savename,explanations)
Ejemplo n.º 13
0
    def __init__(self, model, batcher, vocab, ckpt_id=None, fw_sess=None, bw_model=None, bw_sess=None, bidi_ckpt_path=None):
        self.model = model
        self.bw_model = model
        self.batcher = batcher
        self.vocab = vocab
        self.sess = tf.Session(config=utils.gpu_config()) if fw_sess is None else fw_sess
        self.sess2 = bw_sess
        self.bw_model = bw_model

        if bw_model is None:
            ckpt_path = utils.load_ckpt(self.model.hps, self.model.saver, self.sess)
            print('Checkpoint path name: {}'.format(ckpt_path))
            ckpt_name = 'ckpt-' + ckpt_path.split('-')[-1]
        else:
            ckpt_name = 'ckpt-' + bidi_ckpt_path.split('-')[-1]
        self.decode_dir = os.path.join(model.hps.model_path, make_decode_dir_name(ckpt_name, model.hps))

        if os.path.exists(self.decode_dir):
            pass
        else:
            os.makedirs(self.decode_dir)
Ejemplo n.º 14
0
def convert_to_coverage_model():
    """Load non-coverage checkpoint, add initialized extra variables for
    coverage, and save as new checkpoint"""
    print("converting non-coverage model to coverage model..")

    # initialize an entire coverage model from scratch
    sess = tf.Session(config=utils.get_config())
    print("initializing everything...")
    sess.run(tf.global_variables_initializer())

    # load all non-coverage weights from checkpoint
    saver = tf.train.Saver([v for v in tf.global_variables() if "coverage" not in v.name and "Adagrad" not in v.name])
    print("restoring non-coverage variables...")
    curr_ckpt = utils.load_ckpt(saver, sess)
    print("restored.")

    # save this model and quit
    new_fname = curr_ckpt + '_cov_init'
    print("saving model to %s..." % (new_fname))
    new_saver = tf.train.Saver()
    # this one will save all variables that now exist
    new_saver.save(sess, new_fname)
    print("saved.")
    exit()
Ejemplo n.º 15
0
def main():
    print("Hello!")
    voca = Vocab(args.vocab_fname)
    model = Model(args, voca)
    batcher = Batcher(voca, args)

    with tf.Session(config=GPU_config()) as sess:
        model.build_graph()

        if args.mode == 'train':
            sess.run(tf.global_variables_initializer())
            if not os.path.exists(args.train_logdir):
                os.makedirs(args.train_logdir)
            if not os.path.exists(args.valid_logdir):
                os.makedirs(args.valid_logdir)
            train_writer, valid_writer = tf.summary.FileWriter(
                args.train_logdir,
                sess.graph), tf.summary.FileWriter(args.valid_logdir,
                                                   sess.graph)

            t = trange(args.max_step, leave=True)
            for i in t:
                sample, label = batcher.next_data()
                _, loss, step, summaries = model.run_train_step(sample, sess)
                t.set_description('Train loss: {}'.format(round(loss, 3)))
                train_writer.add_summary(summaries, step)

                if step % 5e3 == 0:
                    model.saver.save(sess, args.model_path, step)

                if step % 5 == 0:
                    valid_sample, valid_label = batcher.next_data(
                        is_valid=True)
                    loss, step, summaries = model.run_eval_step(
                        valid_sample, sess)
                    valid_writer.add_summary(summaries, step)
                    t.set_description('Valid loss: {}'.format(round(loss, 3)))

                if step % 100 == 0:
                    near_ids, near_words = model.get_nearest_words(
                        sess, args.near_K)
                    pprint(near_words)
                    score = coherence_score(args.test_bin_fname, voca,
                                            near_ids)
                    summary = tf.Summary()
                    summary.value.add(tag='coherence_score_{}k'.format(
                        args.near_K),
                                      simple_value=score)
                    valid_writer.add_summary(summary, step)

        else:
            load_ckpt(args.model_path, sess, model.saver)
            near_words_dict = {i: [] for i in range(args.aspect_num)}
            for k in range(5, 50, 5):
                near_ids, near_words = model.get_nearest_words(sess, k)
                score = coherence_score(args.test_bin_fname, voca, near_ids)
                print(k, score)
                for asp_idx in near_words:
                    for word in near_words[asp_idx]:
                        if word not in near_words_dict[asp_idx]:
                            near_words_dict[asp_idx].append(word)

            with open(args.nearword_fname, 'w') as f:
                for idx in range(len(list(near_words_dict.keys()))):
                    print(near_words_dict[idx])
                    f.write(str(idx) + '   ')
                    f.write(' '.join(near_words_dict[idx][:5]))
                    f.write('\n')
Ejemplo n.º 16
0
def main():
    start_time = time()
    last_step = get_last_ckpt_step()
    assert last_step >= 0
    my_log(f'Checkpoint found: {last_step}\n')
    print_args()

    net_init, net_apply, net_init_cache, net_apply_fast = get_net()

    params = load_ckpt(last_step)
    in_shape = (args.batch_size, args.L, args.L, 1)
    _, cache_init = net_init_cache(params, jnp.zeros(in_shape), (-1, -1))

    # sample_raw_fun = get_sample_fun(net_apply, None)
    sample_raw_fun = get_sample_fun(net_apply_fast, cache_init)
    # sample_k_fun = get_sample_k_fun(net_apply, None)
    sample_k_fun = get_sample_k_fun(net_apply_fast, net_init_cache)
    log_q_fun = get_log_q_fun(net_apply)

    @jit
    def update(spins_old, log_q_old, energy_old, step, accept_count,
               energy_mean, energy_var_sum, rng):
        rng, rng_k, rng_sample, rng_accept = jrand.split(rng, 4)
        k = get_k(rng_k)
        spins = sample_k_fun(k, params, spins_old, rng_sample)
        log_q = log_q_fun(params, spins)
        energy = energy_fun(spins)

        log_uniform = jnp.log(jrand.uniform(rng_accept, (args.batch_size, )))
        accept = log_uniform < (log_q_old - log_q + args.beta *
                                (energy_old - energy))

        spins = jnp.where(jnp.expand_dims(accept, axis=(1, 2, 3)), spins,
                          spins_old)
        log_q = jnp.where(accept, log_q, log_q_old)
        energy = jnp.where(accept, energy, energy_old)
        mag = spins.mean(axis=(1, 2, 3))

        step += 1
        accept_count += accept.sum()
        energy_per_spin = energy / args.L**2
        energy_mean, energy_var_sum = welford_update(energy_per_spin.mean(),
                                                     step, energy_mean,
                                                     energy_var_sum)

        return (spins, log_q, energy, mag, accept, k, step, accept_count,
                energy_mean, energy_var_sum, rng)

    rng, rng_init = jrand.split(jrand.PRNGKey(args.seed))
    # Sample initial configurations from the network
    spins = sample_raw_fun(args.batch_size, params, rng_init)
    log_q = log_q_fun(params, spins)
    energy = energy_fun(spins)

    step = 0
    accept_count = 0
    energy_mean = 0
    energy_var_sum = 0

    data_filename = args.log_filename.replace('.log', '.hdf5')
    writer_proto = [
        # Uncomment to save all the sampled spins
        # ('spins', bool, (args.batch_size, args.L, args.L)),
        ('log_q', np.float32, (args.batch_size, )),
        ('energy', np.int32, (args.batch_size, )),
        ('mag', np.float32, (args.batch_size, )),
        ('accept', bool, (args.batch_size, )),
        ('k', np.int32, None),
    ]
    ensure_dir(data_filename)
    with ChunkedDataWriter(data_filename, writer_proto,
                           args.save_step) as writer:
        my_log('Sampling...')
        while step < args.max_step:
            (spins, log_q, energy, mag, accept, k, step, accept_count,
             energy_mean, energy_var_sum,
             rng) = update(spins, log_q, energy, step, accept_count,
                           energy_mean, energy_var_sum, rng)
            # Uncomment to save all the sampled spins
            # writer.write(spins[:, :, :, 0] > 0, log_q, energy, mag, accept, k)
            writer.write(log_q, energy, mag, accept, k)

            if args.print_step and step % args.print_step == 0:
                accept_rate = accept_count / (step * args.batch_size)
                energy_std = jnp.sqrt(energy_var_sum / step)
                my_log(', '.join([
                    f'step = {step}',
                    f'P = {accept_rate:.8g}',
                    f'E = {energy_mean:.8g}',
                    f'E_std = {energy_std:.8g}',
                    f'time = {time() - start_time:.3f}',
                ]))
Ejemplo n.º 17
0
import configs
import utils

prog_args = configs.arg_parse()
ckpt = utils.load_ckpt(prog_args)

save_data = ckpt["save_data"] # get save data

print(ckpt["epoch"])
Ejemplo n.º 18
0
def main():

    utils.print_config(args)

    if 'train' not in args.mode:
        args.keep_rate = 1.0
    args.use_pretrain = True if args.use_pretrain == 'True' else False
    args.use_aux_task = True if args.use_aux_task == 'True' else False

    if args.mode == 'lm_train':
        args.model = 'lm'
        args.data_path = "./data/wikitext/wikitext-103/processed_wiki_train.bin"
        args.use_pretrain = False

    args.model_path = os.path.join(args.model_path, args.exp_name).format(
        args.model)  #model_path default="data/log/{}

    if not os.path.exists(args.model_path):
        if 'train' not in args.mode:
            print(args.model_path)
            raise ValueError
        os.makedirs(args.model_path)
    with open(os.path.join(args.model_path, 'config.json'),
              'w',
              encoding='utf8') as f:
        json.dump(vars(args), f)

    print("Default models path: {}".format(args.model_path))

    print('code start/ {} mode / {} models'.format(args.mode, args.model))
    utils.assign_specific_gpu(args.gpu_nums)

    vocab = utils.Vocab()

    vardicts = utils.get_pretrain_weights(
        args.true_pretrain_ckpt_path
    ) if args.use_pretrain and args.mode == 'train' else None

    if args.mode == 'decode':
        if args.model == 'mmi_bidi': args.beam_size = args.mmi_bsize
        args.batch_size = args.beam_size

    modelhps = deepcopy(args)
    if modelhps.mode == 'decode':
        modelhps.max_dec_len = 1

    if args.model == 'vanilla':
        model = BaseModel(vocab, modelhps)
    elif args.model == 'mmi_bidi':
        if args.mode == 'decode':
            bw_graph = tf.Graph()
            with bw_graph.as_default():
                bw_model = BaseModel(vocab, args)

            bw_sess = tf.Session(graph=bw_graph, config=utils.gpu_config())

            with bw_sess.as_default():
                with bw_graph.as_default():
                    bidi_ckpt_path = utils.load_ckpt(bw_model.hps,
                                                     bw_model.saver, bw_sess)

            fw_graph = tf.Graph()
            with fw_graph.as_default():
                modelhps.model_path = modelhps.model_path.replace(
                    'mmi_bidi', 'vanilla')
                modelhps.model = 'vanilla'
                fw_model = BaseModel(vocab, modelhps)
            fw_sess = tf.Session(graph=fw_graph)
            with fw_sess.as_default():
                with fw_graph.as_default():
                    ckpt_path = utils.load_ckpt(fw_model.hps, fw_model.saver,
                                                fw_sess)
        else:
            model = BaseModel(vocab, modelhps)

    elif args.model == 'lm':
        model = LMModel(vocab, modelhps)
    elif args.model == 'embmin':
        model = DiverEmbMin(vocab, modelhps)
    else:
        raise ValueError
    print('models load end')

    if args.mode in ['train', 'lm_train']:
        train(model, vocab, vardicts)
    elif args.mode == 'decode':
        import time

        if args.model == 'mmi_bidi':
            batcher = Batcher(
                vocab, bw_model.hps.data_path.replace('train_', 'test_'), args)
            decoder = BeamsearchDecoder(fw_model,
                                        batcher,
                                        vocab,
                                        fw_sess=fw_sess,
                                        bw_model=bw_model,
                                        bw_sess=bw_sess,
                                        bidi_ckpt_path=bidi_ckpt_path)
        else:
            batcher = Batcher(vocab,
                              model.hps.data_path.replace('train_', 'test_'),
                              args)
            decoder = BeamsearchDecoder(model, batcher, vocab)
        decoder.decode()
    elif args.mode == 'eval':
        pass
Ejemplo n.º 19
0
                 n_views=args.n_views, depth_interval=args.depth_interval,
                 img_wh=tuple(args.img_wh))

    if args.scan:
        scans = [args.scan]
    else: # evaluate on all scans in dataset
        scans = dataset.scans

    # Step 1. Create depth estimation and probability for each scan
    model = CascadeMVSNet(n_depths=args.n_depths,
                          interval_ratios=args.interval_ratios,
                          num_groups=args.num_groups,
                          norm_act=ABN)
    device = 'cpu' if args.cpu else 'cuda:0'
    model.to(device)
    load_ckpt(model, args.ckpt_path)
    model.eval()

    depth_dir = f'results/{args.dataset_name}/depth'
    print('Creating depth and confidence predictions...')
    if args.scan: # TODO: adapt scan specification to tanks and blendedmvs
        data_range = [i for i, x in enumerate(dataset.metas) if x[0] == args.scan]
    else:
        data_range = range(len(dataset))
    for i in tqdm(data_range):
        imgs, proj_mats, init_depth_min, depth_interval, \
            scan, vid = decode_batch(dataset[i])
        
        os.makedirs(os.path.join(depth_dir, scan), exist_ok=True)

        with torch.no_grad():
Ejemplo n.º 20
0

if __name__ == "__main__":
    args = get_opts()
    w, h = args.img_wh

    kwargs = {'root_dir': args.root_dir, 'img_wh': tuple(args.img_wh)}
    if args.dataset_name == 'llff':
        kwargs['spheric_poses'] = args.spheric_poses
    dataset = dataset_dict[args.dataset_name](split='test', **kwargs)

    embedding_xyz = Embedding(3, 10)
    embedding_dir = Embedding(3, 4)
    nerf_coarse = NeRF()
    nerf_fine = NeRF()
    load_ckpt(nerf_coarse, args.ckpt_path, model_name='nerf_coarse')
    load_ckpt(nerf_fine, args.ckpt_path, model_name='nerf_fine')
    nerf_coarse.cuda().eval()
    nerf_fine.cuda().eval()

    models = [nerf_coarse, nerf_fine]
    embeddings = [embedding_xyz, embedding_dir]

    imgs = []
    psnrs = []
    dir_name = f'results/{args.dataset_name}/{args.scene_name}'
    os.makedirs(dir_name, exist_ok=True)

    for i in tqdm(range(len(dataset))):
        sample = dataset[i]
        rays = sample['rays'].cuda()
Ejemplo n.º 21
0
def main():
    global args, best_prec1
    args = parser.parse_args()

    with open(args.config) as f:
        config = yaml.load(f)

    for k, v in config['common'].items():
        setattr(args, k, v)
    print(args.eval_list)

    if not (args.model_path and os.path.isfile(args.model_path)):
        print("=> no checkpoint found at '{}'".format(args.model_path))
        return

    gpu_num = torch.cuda.device_count()

    if args.distributed:
        args.rank, args.size = init_processes(args.dist_addr, args.dist_port,
                                              gpu_num, args.dist_backend)
        print("=> using {} GPUS for distributed training".format(args.size))
    else:
        args.rank = 0
        print("=> using {} GPUS for training".format(gpu_num))

    # create model
    print("=> creating model '{}'".format(args.arch))
    model = model_zoo[args.arch](num_classes=args.num_classes)

    if not args.distributed:
        if args.arch.startswith('alexnet') or args.arch.startswith('vgg'):
            model.features = torch.nn.DataParallel(model.features)
            model.cuda()
        else:
            model = torch.nn.DataParallel(model).cuda()
    else:
        model.cuda()
        model = torch.nn.parallel.DistributedDataParallel(model, [args.rank])
        print('create DistributedDataParallel model successfully', args.rank)

    if args.rank == 0:
        mkdir_if_no_exist(args.save_path,
                          subdirs=['events/', 'logs/', 'checkpoints/'])
        logger = create_logger('global_logger',
                               '{}/logs/log.txt'.format(args.save_path))
        logger.debug(args)  # log args only to file
    else:
        logger = None

    # define loss function (criterion) and optimizer
    criterion = nn.CrossEntropyLoss().cuda()

    load_ckpt(args.model_path, model)

    cudnn.benchmark = True

    normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406],
                                     std=[0.229, 0.224, 0.225])

    val_loader = torch.utils.data.DataLoader(
        FileListDatasetName(
            args.eval_list,
            args.eval_root,
            transforms.Compose([
                transforms.Resize(args.image_size),
                transforms.CenterCrop(args.input_size),
                #transforms.Resize((args.input_size, args.input_size)),
                transforms.ToTensor(),
                normalize,
            ])),
        batch_size=args.test_batch_size,
        shuffle=False,
        num_workers=args.workers,
        pin_memory=True)

    validate(val_loader, model, criterion, logger, args.print_freq, args.rank)
Ejemplo n.º 22
0
    img_path = img_folder + infile.split('.')[0] + '/'
    video2frames(src_folder + infile, img_path)
    feature = OpticalFlowAnalyzer(img_path).analyze()
    np.savez(dst_folder + outfile, feature)

    [csv_old, index_old] = load_ckpt(ckpt_path).split('#')
    ckpt_index = str(
        max(csv_index, int(index_old)) if csv_path is csv_old else csv_index)
    ckpt_info = csv_path + '#' + ckpt_index
    save_ckpt(ckpt_info, ckpt_path)


if __name__ == "__main__":
    check_path(ckpt_path, binary=True)

    ckpt_info = load_ckpt(ckpt_path)
    if ckpt_info is None or '#' not in ckpt_info:
        ckpt_chunk = csv_paths[0]
        ckpt_index = -1
        save_ckpt(ckpt_chunk + '#' + str(ckpt_index), ckpt_path)
    else:
        ckpt_chunk = ckpt_info.split('#')[0]
        ckpt_index = int(ckpt_info.split('#')[1])

    print('continue from checkpoint ' + ckpt_chunk + ' ' + str(ckpt_index))

    for csv_path in csv_paths:
        print(csv_path + ' has began ...')
        csv_file = csv.reader(open(csv_folder + csv_path + '.csv'))
        _ = next(csv_file)
        rows = [row for row in csv_file]
Ejemplo n.º 23
0
def main(args):

    args.color_t = torch.rand(700, 3)

    if not os.path.exists(args.ckpt_dir):
        os.mkdir(args.ckpt_dir)
    if not os.path.exists(args.summary_dir):
        os.mkdir(args.summary_dir)

    device = torch.device(
        "cuda" if not args.nocuda and torch.cuda.is_available() else "cpu")

    train_data = TrainStation(args=args, train=True)

    train_loader = DataLoader(train_data,
                              batch_size=args.batch_size,
                              shuffle=True,
                              num_workers=args.workers,
                              drop_last=True)

    num_train = len(train_data)

    model = SCALOR(args)
    model.to(device)
    model.train()

    optimizer = torch.optim.RMSprop(model.parameters(), lr=args.lr)

    global_step = 0

    if args.last_ckpt:
        global_step, args.start_epoch = \
            load_ckpt(model, optimizer, args.last_ckpt, device)

    writer = SummaryWriter(args.summary_dir)

    args.global_step = global_step

    log_tau_gamma = np.log(args.tau_end) / args.tau_ep

    for epoch in range(int(args.start_epoch), args.epochs):
        local_count = 0
        last_count = 0
        end_time = time.time()

        for batch_idx, (sample, counting_gt) in enumerate(train_loader):

            tau = np.exp(global_step * log_tau_gamma)
            tau = max(tau, args.tau_end)
            args.tau = tau

            global_step += 1

            log_phase = global_step % args.print_freq == 0 or global_step == 1
            args.global_step = global_step
            args.log_phase = log_phase

            imgs = sample.to(device)

            y_seq, log_like, kl_z_what, kl_z_where, kl_z_depth, \
            kl_z_pres, kl_z_bg, log_imp, counting, \
            log_disc_list, log_prop_list, scalor_log_list = model(imgs)

            log_like = log_like.mean(dim=0)
            kl_z_what = kl_z_what.mean(dim=0)
            kl_z_where = kl_z_where.mean(dim=0)
            kl_z_depth = kl_z_depth.mean(dim=0)
            kl_z_pres = kl_z_pres.mean(dim=0)
            kl_z_bg = kl_z_bg.mean(0)

            total_loss = -(log_like - kl_z_what - kl_z_where - kl_z_depth -
                           kl_z_pres - kl_z_bg)

            optimizer.zero_grad()
            total_loss.backward()

            clip_grad_norm_(model.parameters(), args.cp)
            optimizer.step()

            local_count += imgs.data.shape[0]

            if log_phase:

                time_inter = time.time() - end_time
                end_time = time.time()

                count_inter = local_count - last_count

                print_scalor(global_step, epoch, local_count, count_inter,
                             num_train, total_loss, log_like, kl_z_what,
                             kl_z_where, kl_z_pres, kl_z_depth, time_inter)

                writer.add_scalar('train/total_loss',
                                  total_loss.item(),
                                  global_step=global_step)
                writer.add_scalar('train/log_like',
                                  log_like.item(),
                                  global_step=global_step)
                writer.add_scalar('train/What_KL',
                                  kl_z_what.item(),
                                  global_step=global_step)
                writer.add_scalar('train/Where_KL',
                                  kl_z_where.item(),
                                  global_step=global_step)
                writer.add_scalar('train/Pres_KL',
                                  kl_z_pres.item(),
                                  global_step=global_step)
                writer.add_scalar('train/Depth_KL',
                                  kl_z_depth.item(),
                                  global_step=global_step)
                writer.add_scalar('train/Bg_KL',
                                  kl_z_bg.item(),
                                  global_step=global_step)
                # writer.add_scalar('train/Bg_alpha_KL', kl_z_bg_mask.item(), global_step=global_step)
                writer.add_scalar('train/tau', tau, global_step=global_step)

                log_summary(args,
                            writer,
                            imgs,
                            y_seq,
                            global_step,
                            log_disc_list,
                            log_prop_list,
                            scalor_log_list,
                            prefix='train')

                last_count = local_count

            if global_step % args.generate_freq == 0:
                ####################################### do generation ####################################
                model.eval()
                with torch.no_grad():
                    args.phase_generate = True
                    y_seq, log_like, kl_z_what, kl_z_where, kl_z_depth, \
                    kl_z_pres, kl_z_bg, log_imp, counting, \
                    log_disc_list, log_prop_list, scalor_log_list = model(imgs)
                    args.phase_generate = False
                    log_summary(args,
                                writer,
                                imgs,
                                y_seq,
                                global_step,
                                log_disc_list,
                                log_prop_list,
                                scalor_log_list,
                                prefix='generate')
                model.train()
                ####################################### end generation ####################################

            if global_step % args.save_epoch_freq == 0 or global_step == 1:
                save_ckpt(args.ckpt_dir, model, optimizer, global_step, epoch,
                          local_count, args.batch_size, num_train)
Ejemplo n.º 24
0
def main(**kwargs):
    # parse parameters
    param = default_config()
    param.update({
        "mode": "sds",
        "top_k": 10,
        "ckpt": "ckpt/gnn.pt",
        "use_gpu": False
    })

    param.update(kwargs)

    # read maps
    symp2id, id2symp = read_symp2id()
    dise2id, id2dise = read_dise2id()

    # read data
    datapath = os.path.join("dataset/EHR/test/data.txt")
    fin = open(datapath, "r", encoding="utf-8")
    lines = fin.readlines()

    data_model = ehr.EHR("dataset/EHR", "train")

    # init retrieval system
    ehr_ret = EHR_retrieval(mode=param["mode"])

    # init and load model
    data_model_param = parse_data_model(data_model)
    param.update(data_model_param)
    param = parse_kwargs(param, kwargs)
    gnn = HGNN(**param)

    if param["use_gpu"]:
        gnn.cuda()

    ckpt_path = param.get("ckpt")
    if ckpt_path is None:
        print("[Warning] Do not set ckpt path, load from the default path.")
        load_ckpt("ckpt/checkpoint.pt", gnn, param["use_gpu"])
    else:
        load_ckpt(ckpt_path, gnn, param["use_gpu"])

    dsd_sampler = DSD_sampler("dataset/EHR")
    usu_sampler = USU_sampler("dataset/EHR")

    gnn.eval()

    emb_dise = gnn.gen_all_dise_emb(dsd_sampler)

    # init result list
    before_list = []
    after_list = []
    real_dise_list = []
    init_symp_list = []
    after_symp_list = []

    result_map_bfo = defaultdict(list)
    result_map_aft = defaultdict(list)
    # this is top_k for evaluation p@N, Rec@N, ...
    top_k_list = [1, 5]

    for i, line in enumerate(lines):
        line_data = line.strip().split()
        uid = line_data[0]
        did = line_data[1]
        real_dise_list.append(did)
        symps = line_data[2:]

        # select the first symptom and do inference
        init_symp = symps[0]
        init_symp_list.append(id2symp[init_symp])

        symp_ar = np.array([[init_symp]])

        pred_rank = gnn.rank_query(symp_ar, emb_dise, usu_sampler, top_k=5)

        # calculate statistics
        for top_k in top_k_list:
            pred_top_k = pred_rank[0][:top_k]
            calculate_rec_ndcg(pred_top_k, int(did), top_k, result_map_bfo)

        # print("true did:", did)
        # print("before:", pred_rank)
        before_list.append(pred_rank[0])

        rank_symp = ehr_ret(symp_idx=init_symp, top_k=param["top_k"])
        after_symp_list.append([id2symp[str(t)] for t in rank_symp])
        symp_ar = [np.concatenate([[init_symp], rank_symp], 0)]

        # symp_ar = np.array([symps])
        pred_rank = gnn.rank_query(symp_ar, emb_dise, usu_sampler, top_k=5)
        for top_k in top_k_list:
            pred_top_k = pred_rank[0][:top_k]
            calculate_rec_ndcg(pred_top_k, int(did), top_k, result_map_aft)

        # print("after:", pred_rank)
        after_list.append(pred_rank[0])

        ret_symps = ehr_ret(init_symp, param["top_k"])
        ret_symp_list = []
        for sid in ret_symps:
            ret_symp_list.append(id2symp[str(sid)])

        if i % 100 == 0:
            print("[line]:", i)

    # summary
    bf_log = build_result_log(result_map_bfo, top_k_list)
    af_log = build_result_log(result_map_aft, top_k_list)

    print("[before]: {}".format(bf_log))
    print("[after]: {}".format(af_log))

    # to result csv
    fout = open("retrieval_result_{}.txt".format(param["mode"]),
                "w",
                encoding="utf-8")
    fout.write("did\tbefore_pred\tafter_pred\tinit_symp\taftersymp\n")
    for i in range(len(init_symp_list)):
        wrtline = id2dise[int(real_dise_list[i])] + "\t" + id2dise[int(
            before_list[i][0])] + "\t" + id2dise[int(
                after_list[i]
                [0])] + "\t" + init_symp_list[i] + "\t" + "#".join(
                    after_symp_list[i]) + "\n"
        fout.write(wrtline)

    fin.close()
    fout.close()

    df_res = pd.read_table("retrieval_result_{}.txt".format(param["mode"]))
    df_res.to_excel("retrieval_result_{}.xlsx".format(param["mode"]),
                    encoding="utf-8")
    print("Done")
Ejemplo n.º 25
0
    print('===> prepare model ...')
    unet = models.PConvUNet()
    unet = unet.cuda()
    discriminator = models.Discriminator(in_channels=3)
    discriminator = discriminator.cuda()

    if args.finetune:
        unet.freeze_enc_bn = True  # freeze bn layer for fine tuning
        optimizer = torch.optim.Adam(unet.parameters(), lr=args.lr_finetune)
    else:
        optimizer = torch.optim.Adam(unet.parameters(), lr=args.lr)

    if args.resume:
        # start_iter = utils.load_ckpt(args.resume_folder, [('model', unet)])
        utils.load_ckpt(args.resume_folder, [('model', unet)])
        # unet.load_state_dict(torch.load(args.resume_folder))

    print('===> prepare loss function ...')
    criterion = loss.InpaintingLoss(models.VGG16FeatureExtractor()).cuda()

    print('===> prepare lambda ...')
    LAMBDA_DICT = {
        'valid': 1.0,
        'hole': 6.0,
        'tv': 0.1,
        'prc': 0.05,
        'style': 120.0
    }

    print('===> start training ...')
Ejemplo n.º 26
0
def main():
    start_time = time()
    init_out_dir()
    last_step = get_last_ckpt_step()
    if last_step >= 0:
        my_log(f'\nCheckpoint found: {last_step}\n')
    else:
        clear_log()
    print_args()

    net_init, net_apply, net_init_cache, net_apply_fast = get_net()

    rng, rng_net = jrand.split(jrand.PRNGKey(args.seed))
    in_shape = (args.batch_size, args.L, args.L, 1)
    out_shape, params_init = net_init(rng_net, in_shape)

    _, cache_init = net_init_cache(params_init, jnp.zeros(in_shape), (-1, -1))

    # sample_fun = get_sample_fun(net_apply, None)
    sample_fun = get_sample_fun(net_apply_fast, cache_init)
    log_q_fun = get_log_q_fun(net_apply)

    need_beta_anneal = args.beta_anneal_step > 0

    opt_init, opt_update, get_params = optimizers.adam(args.lr)

    @jit
    def update(step, opt_state, rng):
        params = get_params(opt_state)
        rng, rng_sample = jrand.split(rng)
        spins = sample_fun(args.batch_size, params, rng_sample)
        log_q = log_q_fun(params, spins) / args.L**2
        energy = energy_fun(spins) / args.L**2

        def neg_log_Z_fun(params, spins):
            log_q = log_q_fun(params, spins) / args.L**2
            energy = energy_fun(spins) / args.L**2
            beta = args.beta
            if need_beta_anneal:
                beta *= jnp.minimum(step / args.beta_anneal_step, 1)
            neg_log_Z = log_q + beta * energy
            return neg_log_Z

        loss_fun = partial(expect,
                           log_q_fun,
                           neg_log_Z_fun,
                           mean_grad_expected_is_zero=True)
        grads = grad(loss_fun)(params, spins, spins)
        opt_state = opt_update(step, grads, opt_state)

        return spins, log_q, energy, opt_state, rng

    if last_step >= 0:
        params_init = load_ckpt(last_step)

    opt_state = opt_init(params_init)

    my_log('Training...')
    for step in range(last_step + 1, args.max_step + 1):
        spins, log_q, energy, opt_state, rng = update(step, opt_state, rng)

        if args.print_step and step % args.print_step == 0:
            # Use the final beta, not the annealed beta
            free_energy = log_q / args.beta + energy
            my_log(', '.join([
                f'step = {step}',
                f'F = {free_energy.mean():.8g}',
                f'F_std = {free_energy.std():.8g}',
                f'S = {-log_q.mean():.8g}',
                f'E = {energy.mean():.8g}',
                f'time = {time() - start_time:.3f}',
            ]))

        if args.save_step and step % args.save_step == 0:
            params = get_params(opt_state)
            save_ckpt(params, step)
def main(args):
    model = Model()

    optimizer = torch.optim.SGD(model.parameters(),
                                lr=args.learning_rate,
                                momentum=args.momentum,
                                weight_decay=args.weight_decay,
                                nesterov=args.nesterov)

    scheduler = torch.optim.lr_scheduler.StepLR(optimizer,
                                                step_size=args.step_size,
                                                gamma=args.gamma)

    if args.scheduler == 'multistep':
        scheduler = torch.optim.lr_scheduler.MultiStepLR(optimizer,
                                                         args.milestones,
                                                         gamma=args.gamma)
    elif args.scheduler == 'cosine':
        scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(
            optimizer, args.step_size)

    criterion = torch.nn.CrossEntropyLoss()

    model = model.cuda()
    criterion = criterion.cuda()

    start_epoch = 0

    # Check number of parameters your model
    pytorch_total_params = sum(p.numel() for p in model.parameters())
    print(f"Number of parameters: {pytorch_total_params}")

    if not os.path.exists('{}'.format(args.savepath)):
        os.makedirs('{}'.format(args.savepath))

    # resume
    if args.resume:
        model, optimizer, start_epoch = load_ckpt(model, optimizer, args)

    # Dataloader
    if args.dataset == 'cifar10':
        normalize = transforms.Normalize(mean=[0.4914, 0.4822, 0.4465],
                                         std=[0.2023, 0.1994, 0.2010])
        transform_train = transforms.Compose([
            transforms.RandomCrop(32, padding=4),
            transforms.RandomHorizontalFlip(),
            transforms.ToTensor(),
            normalize,
        ])
        transform_train.transforms.insert(
            0, RandAugment(args.rand_n, args.rand_m))
        transform_val = transforms.Compose([
            transforms.ToTensor(),
            normalize,
        ])
        trainset = CIFAR10(root=args.datapath,
                           train=True,
                           download=True,
                           transform=transform_train)
        valset = CIFAR10(root=args.datapath,
                         train=False,
                         download=True,
                         transform=transform_val)
    elif args.dataset == 'cifar100':
        normalize = transforms.Normalize(mean=[0.4914, 0.4822, 0.4465],
                                         std=[0.2023, 0.1994, 0.2010])
        transform_train = transforms.Compose([
            transforms.RandomCrop(32, padding=4),
            transforms.RandomHorizontalFlip(),
            transforms.ToTensor(),
            normalize,
        ])
        transform_val = transforms.Compose([
            transforms.ToTensor(),
            normalize,
        ])
        trainset = CIFAR100(root=args.datapath,
                            train=True,
                            download=True,
                            transform=transform_train)
        valset = CIFAR100(root=args.datapath,
                          train=False,
                          download=True,
                          transform=transform_val)
    elif args.dataset == 'ImageNet':
        normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406],
                                         std=[0.229, 0.224, 0.225])
        transform_train = transforms.Compose([
            transforms.RandomResizedCrop(image_size),
            transforms.RandomHorizontalFlip(),
            transforms.ToTensor(),
            normalize,
        ])
        transform_val = transforms.Compose([
            transforms.Resize(image_size + 32),
            transforms.CenterCrop(image_size),
            transforms.ToTensor(),
            normalize,
        ])
        trainset = ImageNet(root=args.datapath,
                            split='train',
                            download=False,
                            transform=transform_train)
        valset = ImageNet(root=args.datapath,
                          split='val',
                          download=False,
                          transform=transform_val)
    elif args.dataeset == 'tiny-imagenet-200':
        normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406],
                                         std=[0.229, 0.224, 0.225])
        transform_train = transforms.Compose([
            transforms.RandomResizedCrop(image_size),
            transforms.RandomHorizontalFlip(),
            transforms.ToTensor(),
            normalize,
        ])
        transform_val = transforms.Compose([
            transforms.Resize(image_size + 32),
            transforms.CenterCrop(image_size),
            transforms.ToTensor(),
            normalize,
        ])
        trainset = ImageFolder(root=args.datapath,
                               split='train',
                               download=False,
                               transform=transform_train)
        valset = ImageFolder(root=args.datapath,
                             split='val',
                             download=False,
                             transform=transform_val)

    train_loader = torch.utils.data.DataLoader(trainset,
                                               batch_size=args.batch_size,
                                               shuffle=True,
                                               num_workers=args.num_workers,
                                               pin_memory=False)
    val_loader = torch.utils.data.DataLoader(valset,
                                             batch_size=args.batch_size,
                                             shuffle=False,
                                             num_workers=args.num_workers,
                                             pin_memory=False)

    # start training
    last_top1_acc = 0
    acc1_valid = 0
    best_acc1 = 0
    is_best = False
    for epoch in range(start_epoch, args.epochs):
        print("\n----- epoch: {}, lr: {} -----".format(
            epoch, optimizer.param_groups[0]["lr"]))

        # train for one epoch
        start_time = time.time()
        last_top1_acc = train(train_loader, epoch, model, optimizer, criterion)
        elapsed_time = time.time() - start_time
        print('==> {:.2f} seconds to train this epoch\n'.format(elapsed_time))

        # validate for one epoch
        start_time = time.time()
        acc1_valid = validate(val_loader, model, criterion)
        elapsed_time = time.time() - start_time
        print(
            '==> {:.2f} seconds to validate this epoch\n'.format(elapsed_time))

        # learning rate scheduling
        scheduler.step()

        summary = [epoch, last_top1_acc, acc1_valid.item()]

        is_best = acc1_valid > best_acc1
        best_acc1 = max(acc1_valid, best_acc1)

        save_summary('rexnetv1', args.dataset, args.name, summary)

        checkpoint = {
            'epoch': epoch + 1,
            'state_dict': model.state_dict(),
            'optimizer': optimizer.state_dict()
        }

        save_ckpt(checkpoint, is_best, args)

        #if is_best:
        #  torch.save(model.state_dict(), args.savepath+'model_weight_best.pth')

        # Save model each epoch
        #torch.save(model.state_dict(), args.savepath+'model_weight_epoch{}.pth'.format(epoch))

    print(f"Last Top-1 Accuracy: {last_top1_acc}")
    print(f"Best valid Top-1 Accuracy: {best_acc1}")
    print(f"Number of parameters: {pytorch_total_params}")
Ejemplo n.º 28
0
def main(args, stream):
    if not args.class_wise_sampling and args.data_seed is None:
        args.data_seed = random.randint(1, 1e8)

    # * Prepare data_module *
    dm = DInterface(**vars(args))
    args.class_dict = dm.init_data['class_dict']
    args.classes = list(args.class_dict.keys())
    args.num_classes = len(args.class_dict)
    global_bs = args.gpus * args.batch_size if args.gpus > 1 else args.batch_size

    # * Build model *
    net = build_model(**vars(args))

    if args.load_pretrained:
        pretrained_path = load_pretrain_path_by_args(args, '.pth.tar')
        bl_layers = None
        if args.mode_name in ['train', 'finetune']:
            bl_layers = ['classifier', 'fc']
        net = load_ckpt(net, pretrained_path,
                        train=(args.mode_name == 'train'),
                        block_layers=bl_layers,
                        map_keys=args.map_keys,
                        verbose=True)

    model = BasicModel(net, **vars(args))

    # Resume
    load_path = load_model_path_by_args(args)
    if load_path is not None:
        model.load_from_checkpoint(checkpoint_path=load_path, strict=False)

    # * validate mode *
    if args.mode_name in ['val', 'test']:
        model.final_val = True
        trainer = Trainer.from_argparse_args(args)
        trainer.validate(model, datamodule=dm)
        return

    # * Callbacks *
    # Checkpoint callbacks
    if args.ckpt == 'debug' or not args.save_ckpt:
        # ckpt_callback = get_checkpoint_callback(args.ckpt, save_last=False, save_top_k=0)
        ckpt_callback = get_checkpoint_callback(f'Task_models/{args.net_suffix}', save_last=False, save_top_k=1)
    else:
        cpDir = '{}/{}_{}'.format(args.ckpt, args.model_name, args.net_suffix)
        every_n_train_steps = dm.num_samples//global_bs
        if args.ckpt_ever_n_epoch:
            every_n_train_steps *= args.ckpt_ever_n_epoch
        ckpt_callback = get_checkpoint_callback(
            cpDir, 'val/acc', 'max',
            filename='{epoch}_{val_acc:.2f}',
            every_n_train_steps=every_n_train_steps)

    # Logging callbacks
    if args.train_scale >= 1:
        version_str = f'{args.dataset}_ts={int(args.train_scale):d}'
    else:
        version_str = f'{args.dataset}_ts={args.train_scale:.2%}'
    logger_tb = pl_log.TensorBoardLogger(args.log_dir, args.exp_name, version_str)
    log_dir = logger_tb.log_dir
    args.logger = [logger_tb]
    if pl.utilities.distributed._get_rank() == 0:
        os.makedirs(log_dir)
        stream.all_to_file(log_dir+'/{}.log'.format(
            args.exp_name), flush=True)

    # logger_eren = MyLogger(log_dir, 'exp_log')
    logger_eren = MyLogger(None)
    args.progress_bar_refresh_rate = 0  # Use MyLogger() install of progress_bar
    lr_monitor = pl.callbacks.LearningRateMonitor(logging_interval='epoch')
    args.callbacks = [
        ckpt_callback, logger_eren, lr_monitor
    ]

    # * Accelerating *
    if args.gpus > 1 and (args.accelerator is None and args.plugins is None):
        args.accelerator = 'ddp'
    if args.accelerator == 'ddp':
        args.plugins = pl.plugins.DDPPlugin(find_unused_parameters=False)

    if args.mode_name in ['train', 'finetune']:
        args.benchmark = True

    # * Begin training and testing *
    trainer = Trainer.from_argparse_args(args)

    # Begin training
    trainer.fit(model, datamodule=dm)

    # Final test
    model.final_val = True
    trainer.validate(model, ckpt_path='best', datamodule=dm)

    # Other operations
    print('Best ckpt: {}'.format(trainer.checkpoint_callback.best_model_path))
    if args.ckpt != 'debug' and args.save_ckpt:
        checkpoint_standardize(cpDir)
Ejemplo n.º 29
0
def main():
    start_time = time()
    last_step = get_last_ckpt_step()
    assert last_step >= 0
    my_log(f'Checkpoint found: {last_step}\n')
    print_args()

    net_init, net_apply, net_init_cache, net_apply_fast = get_net()

    params = load_ckpt(last_step)
    in_shape = (args.batch_size, args.L, args.L, 1)
    _, cache_init = net_init_cache(params, jnp.zeros(in_shape), (-1, -1))

    # sample_fun = get_sample_fun(net_apply, None)
    sample_fun = get_sample_fun(net_apply_fast, cache_init)
    log_q_fun = get_log_q_fun(net_apply)

    def sample_energy_fun(rng):
        spins = sample_fun(args.batch_size, params, rng)
        log_q = log_q_fun(params, spins)
        energy = energy_fun(spins)
        return spins, log_q, energy

    @jit
    def update(spins_old, log_q_old, energy_old, step, energy_mean,
               energy_var_sum, rng):
        rng, rng_sample = jrand.split(rng)
        spins, log_q, energy = sample_energy_fun(rng_sample)
        mag = spins.mean(axis=(1, 2, 3))

        step += 1
        energy_per_spin = energy / args.L**2
        energy_mean, energy_var_sum = welford_update(energy_per_spin.mean(),
                                                     step, energy_mean,
                                                     energy_var_sum)

        return (spins, log_q, energy, mag, step, energy_mean, energy_var_sum,
                rng)

    rng, rng_init = jrand.split(jrand.PRNGKey(args.seed))
    spins, log_q, energy = sample_energy_fun(rng_init)

    step = 0
    energy_mean = 0
    energy_var_sum = 0

    data_filename = args.log_filename.replace('.log', '.hdf5')
    writer_proto = [
        # Uncomment to save all the sampled spins
        # ('spins', bool, (args.L, args.L)),
        ('log_q', np.float32, None),
        ('energy', np.int32, None),
        ('mag', np.float32, None),
    ]
    ensure_dir(data_filename)
    with ChunkedDataWriter(data_filename, writer_proto,
                           args.save_step * args.batch_size) as writer:
        my_log('Sampling...')
        while step < args.max_step:
            (spins, log_q, energy, mag, step, energy_mean, energy_var_sum,
             rng) = update(spins, log_q, energy, step, energy_mean,
                           energy_var_sum, rng)
            # Uncomment to save all the sampled spins
            # writer.write_batch(spins[:, :, :, 0] > 0, log_q, energy, mag)
            writer.write_batch(log_q, energy, mag)

            if args.print_step and step % args.print_step == 0:
                energy_std = jnp.sqrt(energy_var_sum / step)
                my_log(', '.join([
                    f'step = {step}',
                    f'E = {energy_mean:.8g}',
                    f'E_std = {energy_std:.8g}',
                    f'time = {time() - start_time:.3f}',
                ]))
Ejemplo n.º 30
0
## Data
trainset, testset, num_classes = L.load_dataset(params['data'],
                                                data_dir=params['data_dir'])
X_train, y_train = F.get_samples(trainset, args.trainsamples)
X_test, y_test = F.get_samples(testset, args.testsamples)
if args.translatetrain:
    X_train, y_train = F.translate(X_train, y_train, stride=7)
if args.translatetest:
    X_test, y_test = F.translate(X_test, y_test, stride=7)
X_train, y_train = X_train.to(device), y_train.to(device)
X_test, y_test = X_test.to(device), y_test.to(device)

## Architecture
net = L.load_architecture(params['data'], params['arch'])
net = utils.load_ckpt(args.model_dir, 'model', net)
net = net.to(device)

## Forward
with torch.no_grad():
    print('train')
    Z_train = net.batch_forward(X_train,
                                batch_size=args.batch_size,
                                loss=args.loss,
                                device=device)
    X_train, y_train, Z_train = F.to_cpu(X_train, y_train, Z_train)
    utils.save_loss(eval_dir, f'train', net.get_loss())

    print('test')
    Z_test = net.batch_forward(X_test,
                               batch_size=args.batch_size,