def show_video(): face_cascade = cv2.CascadeClassifier('haarcascade_frontalface_default.xml') video = cv2.VideoCapture(0) recognizer = cv2.face.LBPHFaceRecognizer_create() recognizer.read(cfg.MODEL_PATH) while True: ret, img = video.read() if not ret: break img = cv2.flip(img, 1) gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY) faces = face_cascade.detectMultiScale(gray, scaleFactor=1.2, minNeighbors=5, minSize=(100, 100), flags=cv2.CASCADE_SCALE_IMAGE) for (x, y, w, h) in faces: cv2.rectangle(img, (x - 2, y - 2), (x + w + 2, y + h + 5), (255, 255, 0), 2) predict_id, percent = predict(recognizer, gray[y:y + h, x:x + w]) name = get_name_with_uid(predict_id) cv2.putText(img, name, (x, y + h), 0, 0.5, (255, 255, 255), 1, cv2.LINE_AA) cv2.imshow('video', img) if cv2.waitKey(1) & 0xFF == ord('q'): break
def show_video(): face_cascade = cv2.CascadeClassifier('haarcascade_frontalface_default.xml') video = cv2.VideoCapture(0) recognizer = cv2.face.LBPHFaceRecognizer_create() recognizer.read(cfg.MODEL_PATH) while True: ret, img = video.read() if not ret: break img = cv2.flip(img, 1) gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY) faces = face_cascade.detectMultiScale( gray, scaleFactor = 1.2, minNeighbors = 5, minSize = (100, 100), flags = cv2.CASCADE_SCALE_IMAGE ) for (x, y, w, h) in faces: cv2.rectangle(img, (x - 2, y - 2), (x + w + 2, y + h + 5), (255, 255, 0), 2) predict_id, percent = predict(recognizer, gray[y : y + h, x : x + w]) name = get_name_with_uid(predict_id) cv2.putText(img, name, (x, y + h), 0, 0.5, (255, 255, 255), 1, cv2.LINE_AA) cv2.imshow('video', img) if cv2.waitKey(1) & 0xFF == ord('q'): break
def get_predictions(data, model): if model is not None: predictions = [predict(image, model) for image in data] else: predictions = [[2, 1] for x in range(len(data))] print('constant_predictions', predictions) return predictions
def compute_hmean(model): model.eval() predict(model, 0) evalParams = default_evaluation_params() gtFilePath = 'gt.zip' submFilePath = 'data/result/epoch_0_gt' resDict = { 'calculated': True, 'Message': '', 'method': '{}', 'per_sample': '{}' } evalData = evaluate_method(gtFilePath, submFilePath, evalParams) resDict.update(evalData) ret = resDict['method'] print(ret) return ret['hmean']
def main(picture): face_cascade = cv2.CascadeClassifier('haarcascade_frontalface_default.xml') img = cv2.imread(picture) pic_w, pic_h, pic_d = img.shape # img = scale_image(img, 1.5) # cv2.imshow('source', img) # cv2.waitKey(0) # cv2.destroyAllWindows() gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY) faces = face_cascade.detectMultiScale(gray, scaleFactor=1.2, minNeighbors=5, minSize=(10, 10), flags=cv2.CASCADE_SCALE_IMAGE) face = np.array([], dtype='uint8') print(len(faces)) if len(faces) > 0: print('Reading model...') recognizer = cv2.face.LBPHFaceRecognizer_create() recognizer.read(cfg.MODEL_PATH) for (x, y, w, h) in faces: cv2.rectangle(img, (x - 2, y - 2), (x + w + 2, y + h + 5), (255, 0, 0), 5) predict_id, percent = predict(recognizer, gray[y:y + h, x:x + w]) name = get_name_with_uid(predict_id) cv2.putText(img, name, (x, y + h), 0, 0.5, (255, 255, 0), 1, cv2.LINE_AA) cv2.imwrite('image.jpg', img)
def main(picture): face_cascade = cv2.CascadeClassifier('haarcascade_frontalface_default.xml') img = cv2.imread(picture) pic_w, pic_h, pic_d = img.shape # img = scale_image(img, 1.5) # cv2.imshow('source', img) # cv2.waitKey(0) # cv2.destroyAllWindows() gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY) faces = face_cascade.detectMultiScale( gray, scaleFactor = 1.2, minNeighbors = 5, minSize = (10, 10), flags = cv2.CASCADE_SCALE_IMAGE ) face = np.array([], dtype = 'uint8') print(len(faces)) if len(faces) > 0: print('Reading model...') recognizer = cv2.face.LBPHFaceRecognizer_create() recognizer.read(cfg.MODEL_PATH) for (x, y, w, h) in faces: cv2.rectangle(img, (x - 2, y - 2), (x + w + 2, y + h + 5), (255, 0, 0), 5) predict_id, percent = predict(recognizer, gray[y : y + h, x : x + w]) name = get_name_with_uid(predict_id) cv2.putText(img, name, (x, y + h), 0, 0.5, (255, 255, 0), 1, cv2.LINE_AA) cv2.imwrite('image.jpg', img)
def main(): hmean = .0 is_best = False warnings.simplefilter('ignore', np.RankWarning) # Prepare for dataset print('EAST <==> Prepare <==> DataLoader <==> Begin') # train_root_path = os.path.abspath(os.path.join('./dataset/', 'train')) train_root_path = cfg.dataroot train_img = os.path.join(train_root_path, 'img') train_gt = os.path.join(train_root_path, 'gt') trainset = custom_dset(train_img, train_gt) train_loader = DataLoader(trainset, batch_size=cfg.train_batch_size_per_gpu * cfg.gpu, shuffle=True, collate_fn=collate_fn, num_workers=cfg.num_workers) print('EAST <==> Prepare <==> Batch_size:{} <==> Begin'.format( cfg.train_batch_size_per_gpu * cfg.gpu)) print('EAST <==> Prepare <==> DataLoader <==> Done') # test datalodaer """ for i in range(100000): for j, (a,b,c,d) in enumerate(train_loader): print(i, j,'/',len(train_loader)) """ # Model print('EAST <==> Prepare <==> Network <==> Begin') model = East() model = nn.DataParallel(model, device_ids=cfg.gpu_ids) model = model.cuda() init_weights(model, init_type=cfg.init_type) cudnn.benchmark = True criterion = LossFunc() optimizer = torch.optim.Adam(model.parameters(), lr=cfg.lr) scheduler = lr_scheduler.StepLR(optimizer, step_size=10000, gamma=0.94) # init or resume if cfg.resume and os.path.isfile(cfg.checkpoint): weightpath = os.path.abspath(cfg.checkpoint) print( "EAST <==> Prepare <==> Loading checkpoint '{}' <==> Begin".format( weightpath)) checkpoint = torch.load(weightpath) start_epoch = checkpoint['epoch'] model.load_state_dict(checkpoint['state_dict']) optimizer.load_state_dict(checkpoint['optimizer']) print( "EAST <==> Prepare <==> Loading checkpoint '{}' <==> Done".format( weightpath)) else: start_epoch = 0 print('EAST <==> Prepare <==> Network <==> Done') for epoch in range(start_epoch, cfg.max_epochs): train(train_loader, model, criterion, scheduler, optimizer, epoch) if epoch % cfg.eval_iteration == 0: # create res_file and img_with_box output_txt_dir_path = predict(model, criterion, epoch) # Zip file submit_path = MyZip(output_txt_dir_path, epoch) # submit and compute Hmean hmean_ = compute_hmean(submit_path) if hmean_ > hmean: is_best = True state = { 'epoch': epoch, 'state_dict': model.state_dict(), 'optimizer': optimizer.state_dict(), 'is_best': is_best, } save_checkpoint(state, epoch)
def main(): set_seed(args.seed) torch.cuda.set_device(args.gpu) encoder = Encoder().cuda() encoder_group_0 = Encoder().cuda() encoder_group_1 = Encoder().cuda() dfc = DFC(cluster_number=args.k, hidden_dimension=64).cuda() dfc_group_0 = DFC(cluster_number=args.k, hidden_dimension=64).cuda() dfc_group_1 = DFC(cluster_number=args.k, hidden_dimension=64).cuda() critic = AdversarialNetwork(in_feature=args.k, hidden_size=32, max_iter=args.iters, lr_mult=args.adv_mult).cuda() # encoder pre-trained with self-reconstruction encoder.load_state_dict(torch.load("./save/encoder_pretrain.pth")) # encoder and clustering model trained by DEC encoder_group_0.load_state_dict(torch.load("./save/encoder_mnist.pth")) encoder_group_1.load_state_dict(torch.load("./save/encoder_usps.pth")) dfc_group_0.load_state_dict(torch.load("./save/dec_mnist.pth")) dfc_group_1.load_state_dict(torch.load("./save/dec_usps.pth")) # load clustering centroids given by k-means centers = np.loadtxt("./save/centers.txt") cluster_centers = torch.tensor(centers, dtype=torch.float, requires_grad=True).cuda() with torch.no_grad(): print("loading clustering centers...") dfc.state_dict()['assignment.cluster_centers'].copy_(cluster_centers) optimizer = torch.optim.Adam(dfc.get_parameters() + encoder.get_parameters() + critic.get_parameters(), lr=args.lr, weight_decay=5e-4) criterion_c = nn.KLDivLoss(reduction="sum") criterion_p = nn.MSELoss(reduction="sum") C_LOSS = AverageMeter() F_LOSS = AverageMeter() P_LOSS = AverageMeter() encoder_group_0.eval(), encoder_group_1.eval() dfc_group_0.eval(), dfc_group_1.eval() data_loader = mnist_usps(args) len_image_0 = len(data_loader[0]) len_image_1 = len(data_loader[1]) for step in range(args.iters): encoder.train() dfc.train() if step % len_image_0 == 0: iter_image_0 = iter(data_loader[0]) if step % len_image_1 == 0: iter_image_1 = iter(data_loader[1]) image_0, _ = iter_image_0.__next__() image_1, _ = iter_image_1.__next__() image_0, image_1 = image_0.cuda(), image_1.cuda() image = torch.cat((image_0, image_1), dim=0) predict_0, predict_1 = dfc_group_0( encoder_group_0(image_0)[0]), dfc_group_1( encoder_group_1(image_1)[0]) z, _, _ = encoder(image) output = dfc(z) output_0, output_1 = output[0:args.bs, :], output[args.bs:args.bs * 2, :] target_0, target_1 = target_distribution( output_0).detach(), target_distribution(output_1).detach() clustering_loss = 0.5 * criterion_c(output_0.log( ), target_0) + 0.5 * criterion_c(output_1.log(), target_1) fair_loss = adv_loss(output, critic) partition_loss = 0.5 * criterion_p(aff(output_0), aff(predict_0).detach()) \ + 0.5 * criterion_p(aff(output_1), aff(predict_1).detach()) total_loss = clustering_loss + args.coeff_fair * fair_loss + args.coeff_par * partition_loss optimizer = inv_lr_scheduler(optimizer, args.lr, step, args.iters) optimizer.zero_grad() total_loss.backward() optimizer.step() C_LOSS.update(clustering_loss) F_LOSS.update(fair_loss) P_LOSS.update(partition_loss) if step % args.test_interval == args.test_interval - 1 or step == 0: predicted, labels = predict(data_loader, encoder, dfc) predicted, labels = predicted.cpu().numpy(), labels.numpy() _, accuracy = cluster_accuracy(predicted, labels, 10) nmi = normalized_mutual_info_score(labels, predicted, average_method="arithmetic") bal, en_0, en_1 = balance(predicted, 60000) print("Step:[{:03d}/{:03d}] " "Acc:{:2.3f};" "NMI:{:1.3f};" "Bal:{:1.3f};" "En:{:1.3f}/{:1.3f};" "C.loss:{C_Loss.avg:3.2f};" "F.loss:{F_Loss.avg:3.2f};" "P.loss:{P_Loss.avg:3.2f};".format(step + 1, args.iters, accuracy, nmi, bal, en_0, en_1, C_Loss=C_LOSS, F_Loss=F_LOSS, P_Loss=P_LOSS)) return
import os import cv2 import numpy as np import pandas as pd from eval import predict _ROOT = "../MacauAI_TrainingSet_1/" _CSV = os.path.join(_ROOT, "training.csv") data = pd.read_csv(_CSV, sep=",") y = predict( data, _ROOT, "./upload", ) n = 0 c = 0 for i, row in data.iterrows(): print(y[i], row["class"]) if y[i] == row["class"]: c += 1 n += 1 print(n, c, c / n)
optimizer.load_state_dict(checkpoint['optimizer']) print("=> loaded checkpoint '{}' (epoch {})".format( args.resume, checkpoint['epoch'])) else: print("=> no checkpoint found at '{}'".format(args.resume)) print(model) print('parameters-----') for name, param in model.named_parameters(): if param.requires_grad: print(name, param.data.size()) if args.predict: print('Prediction mode') print('#Comment:', args.pred_cmnt) print('#Context:', args.pred_ctx) predict(args.pred_cmnt, args.pred_ctx, w2i, model, args.max_ctx_length) elif args.test: print('Test mode') eval(dataset, test_df, w2i, model, args) elif args.case_study: start_time = time.time() case_study(dataset, test_df, w2i, model, args) else: print('Train mode') start_time = time.time() train(args, model, dataset, train_df, val_df, optimizer, w2i, \ n_epoch=args.epoch, start_epoch=args.start_epoch, batch_size=args.batch_size) print("Training duration: %s seconds" % (time.time() - start_time)) print('finish')
def train(args, dataloader_list, encoder, device='cpu', centers=None, save_name='DEC'): """ Trains DFC and optionally the critic, automatically saves when finished training Args: args: Namespace object which contains config set from argument parser { lr, seed, iters, log_dir, test_interval, adv_multiplier, dfc_hidden_dim } dataloader_list (list): this list may consist of only 1 dataloader or multiple encoder: Encoder to use encoder_group_0: Optional pre-trained golden standard model encoder_group_1: Optional pre-trained golden standard model dfc_group_0: Optional cluster centers file obtained with encoder_group_0 dfc_group_1: Optional cluster centers file obtained with encoder_group_1 device: Device configuration centers: Initial centers clusters if available get_loss_trade_off: Proportional importance of individual loss functions save_name: Prefix for save files Returns: DFC: A trained DFC model """ # """ # Function for training and testing a VAE model. # Inputs: # args - # """ set_seed(args.seed) if args.half_tensor: torch.set_default_tensor_type('torch.HalfTensor') dec = DFC(cluster_number=args.cluster_number, hidden_dimension=args.dfc_hidden_dim).to(device) wandb.watch(dec) if not (centers is None): cluster_centers = centers.clone().detach().requires_grad_(True).to(device) with torch.no_grad(): print("loading clustering centers...") dec.state_dict()['assignment.cluster_centers'].copy_(cluster_centers) # depending on the encoder we get the params diff so we have to use this if encoder_param = encoder.get_parameters() if args.encoder_type == 'vae' else [ {"params": get_update_param(encoder), "lr_mult": 1}] optimizer = torch.optim.Adam(dec.get_parameters() + encoder_param, lr=args.dec_lr) # criterion_c = nn.KLDivLoss(reduction="sum") # following dec code more closely criterion_c = nn.KLDivLoss(size_average=False) C_LOSS = AverageMeter() print("Start training") assert 0 < len(dataloader_list) < 3 concat_dataset = torch.utils.data.ConcatDataset([x.dataset for x in dataloader_list]) training_dataloader = torch.utils.data.DataLoader( dataset=concat_dataset, batch_size=args.dec_batch_size, shuffle=True, num_workers=4 ) for step in range(args.dec_iters): encoder.train() dec.train() if step % len(training_dataloader) == 0: iterator = iter(training_dataloader) image, _ = iterator.__next__() image = image.to(device) if args.encoder_type == 'vae': z, _, _ = encoder(image) elif args.encoder_type == 'resnet50': z = encoder(image) else: raise Exception('Wrong encoder type, how did you get this far in running the code?') output = dec(z) target = target_distribution(output).detach() clustering_loss = criterion_c(output.log(), target) / output.shape[0] optimizer.zero_grad() clustering_loss.backward() optimizer.step() C_LOSS.update(clustering_loss) wandb.log({f"{save_name} Train C Loss Avg": C_LOSS.avg, f"{save_name} step": step}) wandb.log({f"{save_name} Train C Loss Cur": C_LOSS.val, f"{save_name} step": step}) if step % args.test_interval == args.test_interval - 1 or step == 0: predicted, labels = predict(dataloader_list, encoder, dec, device=device, encoder_type=args.encoder_type) predicted, labels = predicted.cpu().numpy(), labels.numpy() _, accuracy = cluster_accuracy(predicted, labels, args.cluster_number) nmi = normalized_mutual_info_score(labels, predicted, average_method="arithmetic") bal, en_0, en_1 = balance(predicted, len(dataloader_list[0]), k=args.cluster_number) wandb.log( {f"{save_name} Train Accuracy": accuracy, f"{save_name} Train NMI": nmi, f"{save_name} Train Bal": bal, f"{save_name} Train Entropy 0": en_0, f"{save_name} Train Entropy 1": en_1, f"{save_name} step": step}) print("Step:[{:03d}/{:03d}] " "Acc:{:2.3f};" "NMI:{:1.3f};" "Bal:{:1.3f};" "En:{:1.3f}/{:1.3f};" "Clustering.loss:{C_Loss.avg:3.2f};".format(step + 1, args.dec_iters, accuracy, nmi, bal, en_0, en_1, C_Loss=C_LOSS)) # log tsne visualisation if args.encoder_type == "vae": tsne_img = tsne_visualization(dataloader_list, encoder, args.cluster_number, encoder_type=args.encoder_type, device=device) if not (tsne_img is None): wandb.log({f"{save_name} TSNE": plt, f"{save_name} step": step}) torch.save(dec.state_dict(), f'{args.log_dir}DFC_{save_name}.pth') return dec
def upload_file(): if request.method == 'POST': print "PROCESSING CLASSIFICATION REQUEST" f = request.files['wav'] f.save(FNAME) return str(predict(FNAME))
def train(args, dataloader_list, encoder, encoder_group_0=None, encoder_group_1=None, dfc_group_0=None, dfc_group_1=None, device='cpu', centers=None, get_loss_trade_off=lambda step: (10, 10, 10), save_name='DFC'): """Trains DFC and optionally the critic, automatically saves when finished training Args: args: Namespace object which contains config set from argument parser { lr, seed, iters, log_dir, test_interval, adv_multiplier, dfc_hidden_dim } dataloader_list (list): this list may consist of only 1 dataloader or multiple encoder: Encoder to use encoder_group_0: Optional pre-trained golden standard model encoder_group_1: Optional pre-trained golden standard model dfc_group_0: Optional cluster centers file obtained with encoder_group_0 dfc_group_1: Optional cluster centers file obtained with encoder_group_1 device: Device configuration centers: Initial centers clusters if available get_loss_trade_off: Proportional importance of individual loss functions save_name: Prefix for save files Returns: DFC: A trained DFC model """ set_seed(args.seed) if args.half_tensor: torch.set_default_tensor_type('torch.HalfTensor') dfc = DFC(cluster_number=args.cluster_number, hidden_dimension=args.dfc_hidden_dim).to(device) wandb.watch(dfc) critic = AdversarialNetwork(in_feature=args.cluster_number, hidden_size=32, max_iter=args.iters, lr_mult=args.adv_multiplier).to(device) wandb.watch(critic) if not (centers is None): cluster_centers = centers.clone().detach().requires_grad_(True).to( device) with torch.no_grad(): print("loading clustering centers...") dfc.state_dict()['assignment.cluster_centers'].copy_( cluster_centers) encoder_param = encoder.get_parameters( ) if args.encoder_type == 'vae' else [{ "params": encoder.parameters(), "lr_mult": 1 }] optimizer = torch.optim.Adam(dfc.get_parameters() + encoder_param + critic.get_parameters(), lr=args.dec_lr, weight_decay=5e-4) criterion_c = nn.KLDivLoss(reduction="sum") criterion_p = nn.MSELoss(reduction="sum") C_LOSS = AverageMeter() F_LOSS = AverageMeter() P_LOSS = AverageMeter() partition_loss_enabled = True if not encoder_group_0 or not encoder_group_1 or not dfc_group_0 or not dfc_group_1: print( "Missing Golden Standard models, switching to DEC mode instead of DFC." ) partition_loss_enabled = False if partition_loss_enabled: encoder_group_0.eval(), encoder_group_1.eval() dfc_group_0.eval(), dfc_group_1.eval() print("Start training") assert 0 < len(dataloader_list) < 3 len_image_0 = len(dataloader_list[0]) len_image_1 = len( dataloader_list[1]) if len(dataloader_list) == 2 else None for step in range(args.iters): encoder.train() dfc.train() if step % len_image_0 == 0: iter_image_0 = iter(dataloader_list[0]) if len_image_1 and step % len_image_1 == 0: iter_image_1 = iter(dataloader_list[1]) image_0, _ = iter_image_0.__next__() image_0 = image_0.to(device) if not (len_image_1 is None): image_1, _ = iter_image_1.__next__() image_1 = image_1.to(device) image = torch.cat((image_0, image_1), dim=0) else: image_1 = None image = torch.cat((image_0, ), dim=0) if args.encoder_type == 'vae': z, _, _ = encoder(image) elif args.encoder_type == 'resnet50': z = encoder(image) else: raise Exception( 'Wrong encoder type, how did you get this far in running the code?' ) output = dfc(z) features_enc_0 = encoder_group_0( image_0)[0] if args.encoder_type == 'vae' else encoder_group_0( image_0) predict_0 = dfc_group_0(features_enc_0) features_enc_1 = encoder_group_1( image_1)[0] if args.encoder_type == 'vae' else encoder_group_1( image_1) predict_1 = dfc_group_1(features_enc_1) if not ( image_1 is None) else None output_0, output_1 = output[0:args.bs, :], output[ args.bs:args.bs * 2, :] if not (predict_1 is None) else None target_0, target_1 = target_distribution( output_0).detach(), target_distribution(output_1).detach() if not ( predict_1 is None) else None # Equaition (5) in the paper # output_0 and output_1 are probability distribution P of samples being assinged to a class in k # target_0 and target_1 are auxiliary distribuion Q calculated based on P. Eqation (4) in the paper if not (output_1 is None): clustering_loss = 0.5 * criterion_c(output_0.log( ), target_0) + 0.5 * criterion_c(output_1.log(), target_1) else: clustering_loss = criterion_c(output_0.log(), target_0) # Equation (2) in the paper # output = D(A(F(X))) # critic is the distribuition of categorical sensitive subgroup variable G (?) if len(dataloader_list) > 1: fair_loss, critic_acc = adv_loss(output, critic, device=device) else: fair_loss, critic_acc = 0, 0 if partition_loss_enabled: # Equation (3) in the paper # output_0 and output_1 are the output of the pretrained encoder # predict_0 and predict_1 are the soft cluster assignments of the DFC. # loss is high if the outputs and predictions (and this the cluster structures) differ. if not (predict_1 is None): partition_loss = 0.5 * criterion_p(aff(output_0), aff(predict_0).detach()) \ + 0.5 * criterion_p(aff(output_1), aff(predict_1).detach()) else: partition_loss = criterion_p(aff(output_0), aff(predict_0).detach()) else: partition_loss = 0 loss_trade_off = get_loss_trade_off(step) if args.encoder_type == 'resnet50' and args.dataset == 'office_31': # alpha_s loss_trade_off = list(loss_trade_off) loss_trade_off[1] = ((512 / 128)**2) * (31 / 10) total_loss = loss_trade_off[0] * fair_loss + loss_trade_off[ 1] * partition_loss + loss_trade_off[2] * clustering_loss optimizer = inv_lr_scheduler(optimizer, args.lr, step, args.iters) optimizer.zero_grad() total_loss.backward() optimizer.step() C_LOSS.update(clustering_loss) F_LOSS.update(fair_loss) P_LOSS.update(partition_loss) wandb.log({ f"{save_name} Train C Loss Avg": C_LOSS.avg, f"{save_name} Train F Loss Avg": F_LOSS.avg, f"{save_name} Train P Loss Avg": P_LOSS.avg, f"{save_name} step": step, f"{save_name} Critic ACC": critic_acc }) wandb.log({ f"{save_name} Train C Loss Cur": C_LOSS.val, f"{save_name} Train F Loss Cur": F_LOSS.val, f"{save_name} Train P Loss Cur": P_LOSS.val, f"{save_name} step": step }) if step % args.test_interval == args.test_interval - 1 or step == 0: predicted, labels = predict(dataloader_list, encoder, dfc, device=device, encoder_type=args.encoder_type) predicted, labels = predicted.cpu().numpy(), labels.numpy() _, accuracy = cluster_accuracy(predicted, labels, args.cluster_number) nmi = normalized_mutual_info_score(labels, predicted, average_method="arithmetic") bal, en_0, en_1 = balance(predicted, len_image_0, k=args.cluster_number) wandb.log({ f"{save_name} Train Accuracy": accuracy, f"{save_name} Train NMI": nmi, f"{save_name} Train Bal": bal, f"{save_name} Train Entropy 0": en_0, f"{save_name} Train Entropy 1": en_1, f"{save_name} step": step }) print("Step:[{:03d}/{:03d}] " "Acc:{:2.3f};" "NMI:{:1.3f};" "Bal:{:1.3f};" "En:{:1.3f}/{:1.3f};" "Clustering.loss:{C_Loss.avg:3.2f};" "Fairness.loss:{F_Loss.avg:3.2f};" "Partition.loss:{P_Loss.avg:3.2f};".format(step + 1, args.iters, accuracy, nmi, bal, en_0, en_1, C_Loss=C_LOSS, F_Loss=F_LOSS, P_Loss=P_LOSS)) # log tsne visualisation if args.encoder_type == "vae": tsne_img = tsne_visualization(dataloader_list, encoder, args.cluster_number, encoder_type=args.encoder_type, device=device) if not (tsne_img is None): wandb.log({ f"{save_name} TSNE": plt, f"{save_name} step": step }) torch.save(dfc.state_dict(), f'{args.log_dir}DFC_{save_name}.pth') if len(dataloader_list) > 1: torch.save(critic.state_dict(), f'{args.log_dir}CRITIC_{save_name}.pth') return dfc
from library_viterby import preprocess from eval import read_training_artifacts if __name__ == "__main__": try: data_file = sys.argv[1] except IndexError: print('using default file') data_file = "test_generate.txt" word_file_path = 'artifacts/source/temp_generate.txt' words = [] with open(word_file_path, 'w') as output_file: with open(data_file, 'r') as input_file: for line in input_file: for word in eval(line): words.append(word) output_file.write(f'{word}\n') input_file.close() output_file.close() A, B, vocab, tag_counts, states = read_training_artifacts() _, processed_text_corpus = preprocess(vocab, word_file_path) predictions = predict(states, A, B, vocab, tag_counts, processed_text_corpus) print(words) print(predictions)