def init_log_path(input_img_path, params): """ :param input_img_path: :param params: :return: save path for segmentation and classification image name includes param information """ log_path = os.path.join(os.getcwd(), 'Predict_Result') mkdir(log_path) split_path = os.path.split(input_img_path) origin_img_name = split_path[1][:-4] log_path = os.path.join(log_path, split_path[1]) mkdir(log_path) log_path = os.path.join(log_path, "Filter_" + str(params['filter_size'])) mkdir(log_path) origin_img_name += "Filter_" + str(params['filter_size']) log_path = os.path.join(log_path, "threshold_" + str(params['threshold'])) mkdir(log_path) origin_img_name += "threshold_" + str(params['threshold']) log_path = os.path.join(log_path, "Removepixel_" + str(params['remove_pixel'])) mkdir(log_path) origin_img_name += "Removepixel_" + str(params['remove_pixel']) return log_path, origin_img_name
def Visualize_Confident_Prediction_WithStructure(save_path, map_name, Final_Predict_file, factor, real_loc_ref, output_str): Final_Predict_Dict = {} Final_Prob_Dict = {} n_class = 4 with open(Final_Predict_file, 'r') as file: line = file.readline() while line: line = line.strip() split_result = line.split() key = split_result[0] tmp_label = int(split_result[1]) Final_Predict_Dict[key] = tmp_label Final_Prob_Dict[key] = float(split_result[ 2 + tmp_label]) #the prob of the predicted class by our Model line = file.readline() save_path = os.path.join(save_path, output_str) mkdir(save_path) tmp_visual_pred_path = os.path.join(save_path, map_name + output_str + "_predC.pdb") natm = 1 chain_dict = Build_Chain_ID() Nstep = 11 back = int((float(Nstep) - 1) / 2) with open(tmp_visual_pred_path, 'w') as predfile: for key in Final_Predict_Dict.keys(): tmp_label = Final_Predict_Dict[key] tmp_prob = Final_Prob_Dict[key] if tmp_prob < 0.9: continue tmp_chain = chain_dict[tmp_label] if key not in real_loc_ref: continue #because it's background, ignore predictions tmp_coordinate = real_loc_ref[key] line = "ATOM%7d %3s %3s%2s%4d " % (natm, "CA ", "ALA", " " + tmp_chain, natm) line += "%8.3f%8.3f%8.3f%6.2f\n" % ( tmp_coordinate[0], tmp_coordinate[1], tmp_coordinate[2], 1) predfile.write(line) #color it based on our definition #using pymol -u *.pml to open it tmp_visual_script_path = os.path.join(save_path, map_name + output_str + "_predC.pml") with open(tmp_visual_script_path, 'w') as file: file.write("load " + map_name + output_str + "_predC.pdb\n") current_obj_name = map_name + output_str + "_predC" file.write("show spheres, " + current_obj_name + "\n") file.write("set sphere_scale, 0.5\n") file.write("color green, chain A and " + current_obj_name + "\n") file.write("color yellow, chain B and " + current_obj_name + "\n") file.write("color red, chain C and " + current_obj_name + "\n") file.write("color cyan, chain D and " + current_obj_name + "\n") file.write("select coil, chain A and " + current_obj_name + "\n") file.write("select beta, chain B and " + current_obj_name + "\n") file.write("select alpha, chain C and " + current_obj_name + "\n") file.write("select DNA_RNA, chain D and " + current_obj_name + "\n") file.write("bg_color 0\n")
def Visualize_Binary_Confident_Prediction(save_path,map_name,Final_Predict_file,factor,output_str): Final_Predict_Dict={} Final_Prob_Dict = {} with open(Final_Predict_file,'r') as file: line=file.readline() while line: line=line.strip() split_result=line.split() key=split_result[0] tmp_label = int(split_result[1]) tmp_label = 0 if tmp_label<=2 else 1 if tmp_label==0: tmp_prob=0 for k in range(3): tmp_prob += float(split_result[2 + tmp_label]) else: tmp_prob=float(split_result[5]) Final_Prob_Dict[key]=tmp_prob Final_Predict_Dict[key]=tmp_label line=file.readline() save_path=os.path.join(save_path,output_str) mkdir(save_path) tmp_visual_pred_path = os.path.join(save_path, map_name +output_str+ "_predC.pdb") natm = 1 chain_dict = Build_Chain_ID() Nstep = 11 back = int((float(Nstep) - 1) / 2) with open(tmp_visual_pred_path, 'w') as predfile: for key in Final_Predict_Dict.keys(): tmp_label = Final_Predict_Dict[key] tmp_prob = Final_Prob_Dict[key] if tmp_prob < 0.8: continue tmp_chain = chain_dict[tmp_label] coordinate=key.split(",") tmp_coordinate=[] for tmp_loc_idx in range(3): tmp_loc=coordinate[tmp_loc_idx] tmp_coordinate.append(float(tmp_loc)*factor+back) line = "ATOM%7d %3s %3s%2s%4d " % (natm, "CA ", "ALA", " " + tmp_chain, natm) line += "%8.3f%8.3f%8.3f%6.2f\n" % (tmp_coordinate[0], tmp_coordinate[1], tmp_coordinate[2], 1) predfile.write(line) #color it based on our definition #using pymol -u *.pml to open it tmp_visual_script_path = os.path.join(save_path, map_name + output_str + "_pred.pml") with open(tmp_visual_script_path,'w') as file: file.write("load "+map_name +output_str+ "_pred.pdb\n") current_obj_name=map_name +output_str+ "_pred" file.write("show spheres, "+current_obj_name+"\n") file.write("set sphere_scale, 0.5\n") file.write("color red, chain A and "+current_obj_name+"\n") file.write("color cyan, chain B and " + current_obj_name + "\n") file.write("select protein, chain A and "+current_obj_name+"\n") file.write("select DNA_RNA, chain B and " + current_obj_name + "\n") file.write("bg_color 0\n")
def Gen_Predictions(all_map_path, save_path): code_path = os.path.join(os.getcwd(), 'source') code_path = os.path.join(code_path, 'hpx_unet_190116.py') network_path = os.path.join(os.getcwd(), 'network') network_path = os.path.join(network_path, 'hpx_190116') listfiles = os.listdir(all_map_path) for item in listfiles: tmp_output_path = os.path.join(save_path, item[:-4]) mkdir(tmp_output_path) tmp_map_path = os.path.join(all_map_path, item) run_cmd = "python3 " + code_path + " -n " + network_path + " -d map-predict " + tmp_map_path + " -o " + tmp_output_path os.system(run_cmd)
def write_slurm_sh(id, command_line, outlog_path): run_path = os.path.join(os.getcwd(), 'log') mkdir(run_path) batch_file = os.path.join(run_path, 'slurm-job' + str(id) + '.sh') with open(batch_file, 'w') as file: file.write('#!/usr/bin/env bash\n') file.write('\n') file.write('#SBATCH -o ' + outlog_path + '\n') file.write('#SBATCH -p kihara\n') file.write('#SBATCH --cpus-per-task=1\n') file.write('#SBATCH --ntasks=1\n') file.write(command_line + '\n') return batch_file
def Prepare_Data(file_path): """ :param file_path: dir saves all the prepared data :return: the """ #Create a dir to save the processed numpy files from the input_path = os.path.join(file_path, 'Training_Data') mkdir(input_path) save_path = input_path #considering you use corresponding name txt to record if it's an acceptable decoy or not pdb_file_list=[x for x in os.listdir(file_path) if 'pdb' in x] pdb_file_list.sort() aim_file_list = [x for x in os.listdir(file_path) if 'txt' in x] aim_file_list.sort() #here I just used atom40, the situation that information all you need comes from the pdb file #considering we do not need to use the modified goap and itscore, which requires the pdb name as complex.***.pdb atom40_input = [] atom40_output=[] for start_index,file in enumerate(pdb_file_list): train_tmp_file=os.path.join(file_path,file) #process it if start_index==0: rcount = Get_Rcount(train_tmp_file) #get info for receptor and ligand #here info:[x_coordinate,y_coordinate,z_coordinate,atom_type] rlist, llist = form_atom_list(train_tmp_file, rcount) #get info in the interface area rlist, llist = Form_interface(rlist, llist, 0) # This type doesn't matter #get the input from the info in interface area tempload, rlength, llength = reform_input(rlist, llist, 2) #here i used four rotation as an example atom40_tmp = tempload for k in range(4): atom40_input.append(atom40_tmp[:, :, :, :, k]) #get the output aim_tmp_file=os.path.join(file_path,aim_file_list[start_index]) with open(aim_tmp_file,'r') as tmp_file: line=tmp_file.readline() line=line.strip() aim_tmp=int(line) atom40_output.append(aim_tmp) #here you have two choices, saving them in separate numpy files or one file. #Considering your memory, if it's larger than 100GB, we strongly suggest to use one file as we do in our training atom40_input = np.array(atom40_input) atom40_output=np.array(atom40_output) train_path = os.path.join(save_path, 'trainset.npy') np.save(train_path, atom40_input) aim_path = os.path.join(save_path, 'aimset.npy') np.save(aim_path, atom40_output) return save_path
def main(): args = parser.parse_args() if args.seed is not None: random.seed(args.seed) torch.manual_seed(args.seed) cudnn.deterministic = True warnings.warn('You have chosen to seed training. ' 'This will turn on the CUDNN deterministic setting, ' 'which can slow down your training considerably! ' 'You may see unexpected behavior when restarting ' 'from checkpoints.') if args.gpu is not None: warnings.warn('You have chosen a specific GPU. This will completely ' 'disable data parallelism.') if args.dist_url == "env://" and args.world_size == -1: args.world_size = int(os.environ["WORLD_SIZE"]) args.distributed = args.world_size > 1 or args.multiprocessing_distributed params = vars(args) if args.cloud == 1: data_path = "/cache/" + args.data else: data_path = args.data # the path stored # args.data = data_path if args.cloud: import moxing as mox start_path = os.path.join(params['data_url'], args.pretrained) move_to_path = os.path.join(data_path, args.pretrained) mkdir(data_path) mox.file.copy(start_path, move_to_path) args.pretrained = move_to_path ngpus_per_node = torch.cuda.device_count() if args.multiprocessing_distributed: # Since we have ngpus_per_node processes per node, the total world_size # needs to be adjusted accordingly args.world_size = ngpus_per_node * args.world_size # Use torch.multiprocessing.spawn to launch distributed processes: the # main_worker process function mp.spawn(main_worker, nprocs=ngpus_per_node, args=(ngpus_per_node, args)) else: # Simply call main_worker function main_worker(args.gpu, ngpus_per_node, args)
def run_goap(item1): decoydataset = os.path.join(os.getcwd(), 'decoys') goapset = os.path.join(os.getcwd(), 'Goap') goapdecoys = os.path.join(os.getcwd(), "goapdecoy") pathgenerate = os.path.join(decoydataset, item1) listtemp = os.listdir(pathgenerate) listdecoy = [] for item in listtemp: if item[0:7] == 'complex': listdecoy.append(item) os.chdir(goapset) #Copy files to the running directory os.system("cp fort.21_1.61_2 " + pathgenerate + "/fort.21_1.61_2") os.system("cp charge_inp.dat " + pathgenerate + "/charge_inp.dat") os.system("cp side_geometry.dat " + pathgenerate + "/side_geometry.dat") os.system("cp fort.31_g72_noshift5_new " + pathgenerate + "/fort.31_g72_noshift5_new") os.system("cp goap " + pathgenerate + "/goap") pathgoapdecoy = os.path.join(goapdecoys, item1) mkdir(pathgoapdecoy) os.chdir(pathgenerate) listrun = listdecoy if len(listrun) == 0: print('no decoys avilable') return print('waiting dealing' + str(len(listrun))) file_object = open(str(item1) + '.inp', 'w') try: file_object.write(goapset + '\n') for item2 in listrun: file_object.write(str(item2) + '\n') finally: file_object.close() os.chdir(pathgenerate) os.system("./goap<" + str(item1) + ".inp") allfiles = os.listdir(pathgenerate) list2 = [] for item in allfiles: if item[-9:] == '_goap.pdb': list2.append(item) for item in list2: tmp_path = os.path.join(pathgoapdecoy, item) os.system("mv " + item + " " + tmp_path)
def __init__(self, root): self.root = root self.final_path = os.path.join(self.root, 'SVHN') mkdir(self.final_path) self.train_path = os.path.join(self.final_path, 'trainset') self.test_path = os.path.join(self.final_path, 'testset') self.extra_path = os.path.join(self.final_path, 'extraset') mkdir(self.train_path) mkdir(self.test_path) mkdir(self.extra_path) if os.path.getsize(self.train_path) < 10000: self.Process_Dataset(self.train_path, 'train') if os.path.getsize(self.test_path) < 10000: self.Process_Dataset(self.test_path, 'test') if os.path.getsize(self.extra_path) < 10000: self.Process_Dataset(self.extra_path, 'extra')
def __init__(self, save_path): self.root = save_path self.download_init() if not self._check_integrity(): mkdir(save_path) self.download() self.final_path = os.path.join(save_path, 'cifar10') mkdir(self.final_path) #generate npy files here self.train_path = os.path.join(self.final_path, 'trainset') self.test_path = os.path.join(self.final_path, 'testset') mkdir(self.train_path) mkdir(self.test_path) if os.path.getsize(self.train_path) < 10000: self.Process_Dataset(self.train_list, self.train_path) if os.path.getsize(self.test_path) < 10000: self.Process_Dataset(self.test_list, self.test_path)
def Generate_Logpath(lr, reg): """ :param lr: learning rate :param reg: regularization :return: the log path """ record_path = os.path.join(os.getcwd(), 'Train_record') mkdir(record_path) record_path = os.path.join(record_path, 'lr_' + str(lr)) mkdir(record_path) record_path = os.path.join(record_path, 'reg_' + str(reg)) mkdir(record_path) return record_path
def __init__(self, root): self.root = os.path.expanduser(root) self.final_path = os.path.join(self.root, 'STL10') mkdir(self.final_path) self.train_path = os.path.join(self.final_path, 'trainset') self.test_path = os.path.join(self.final_path, 'testset') self.extra_path = os.path.join(self.final_path, 'unlabelset') mkdir(self.train_path) mkdir(self.test_path) mkdir(self.extra_path) if not self._check_integrity(): self.download() check_path = os.path.join(self.train_path, 'trainset.npy') if not os.path.exists( check_path) or os.path.getsize(check_path) < 10000: self.Process_Dataset(self.train_path, 'train') check_path = os.path.join(self.test_path, 'trainset.npy') if not os.path.exists( check_path) or os.path.getsize(check_path) < 10000: self.Process_Dataset(self.test_path, 'test') check_path = os.path.join(self.extra_path, 'trainset.npy') if not os.path.exists( check_path) or os.path.getsize(check_path) < 10000: self.Process_Dataset(self.extra_path, 'extra')
def __init__(self, save_path,execute=True): if execute: self.root=save_path self.download_init() #mkdir(save_path) #self.download() #self.final_path=os.path.join(save_path,'cifar10') self.final_path=os.path.join(save_path,'Wikiart_Load') mkdir(self.final_path) #generate npy files here self.train_path=os.path.join(self.final_path,'trainset') self.test_path = os.path.join(self.final_path, 'testset') mkdir(self.train_path) mkdir(self.test_path) if os.path.getsize(self.train_path)<1000000: self.Process_Dataset(self.train_list, self.train_path) if os.path.getsize(self.test_path)<1000000: self.Process_Dataset(self.test_list, self.test_path)
def Evaluate_Haruspex(input_path, study_file_path, indicate, type): test_id_list = [] # the list which we needs to get phase2 input if type != 3: with open(study_file_path, 'r') as file: line = file.readline() while line: line = line.strip() test_id_list.append(line) line = file.readline() else: for k in range(1, 5): tmp_study_path = os.path.join(study_file_path, 'Fold' + str(k) + '.txt') tmp_list = [] with open(tmp_study_path, 'r') as file: line = file.readline() while line: line = line.strip() tmp_list.append(line) line = file.readline() tmp_list = tmp_list[:5] test_id_list += tmp_list print("We have %d maps to evaluate" % len(test_id_list)) output_path = os.path.join(os.getcwd(), "Predict_Result") mkdir(output_path) output_path = os.path.join(output_path, indicate) mkdir(output_path) listfiles = os.listdir(input_path) code_path = os.path.join(os.getcwd(), 'source') code_path = os.path.join(code_path, 'hpx_unet_190116.py') network_path = os.path.join(os.getcwd(), 'network') network_path = os.path.join(network_path, 'hpx_190116') for item in listfiles: if item[:4] not in test_id_list: continue tmp_output_path = os.path.join(output_path, item[:4]) mkdir(tmp_output_path) tmp_map_path = os.path.join(input_path, item) run_cmd = "python3 " + code_path + " -n " + network_path + " -d map-predict " + tmp_map_path + " -o " + tmp_output_path os.system(run_cmd)
if type == 0: indicate = 'SIMU6' elif type == 1: indicate = 'SIMU10' elif type == 2: indicate = 'SIMU_MIX' elif type == 3: indicate = 'REAL' else: print( "we only have 4 type predictions: simulated(0,1,2) and experimental map(3)" ) exit() factor = 2 # reduce 4 to 2 to get more data save_path = os.path.join(os.getcwd(), 'Predict_Result') mkdir(save_path) save_path = os.path.join(save_path, indicate) mkdir(save_path) fold = params['fold'] # specify use which fold Model based on real map if type == 3: save_path = os.path.join(save_path, "Fold%d_Model_Result" % fold) mkdir(save_path) name_split = os.path.split(input_map) map_name = name_split[1] map_name = map_name.split(".")[0] save_path = os.path.join(save_path, map_name) mkdir(save_path) # reform the map voxel size to 1A instead of experimental voxel size from process_map.Reform_Map_Voxel import Reform_Map_Voxel, Reform_Map_Voxel_Final output_map = os.path.join(save_path, map_name + ".mrc") if type == 3:
def main_worker(gpu, ngpus_per_node, args): global best_acc1 args.gpu = gpu params = vars(args) # suppress printing if not master if args.multiprocessing_distributed and args.gpu != 0: def print_pass(*args): pass builtins.print = print_pass if args.gpu is not None: print("Use GPU: {} for training".format(args.gpu)) if args.distributed: if args.dist_url == "env://" and args.rank == -1: args.rank = int(os.environ["RANK"]) if args.multiprocessing_distributed: # For multiprocessing distributed training, rank needs to be the # global rank among all the processes args.rank = args.rank * ngpus_per_node + gpu dist.init_process_group(backend=args.dist_backend, init_method=args.dist_url, world_size=args.world_size, rank=args.rank) # create model print("=> creating model '{}'".format(args.arch)) if args.dataset == "Place205": num_classes = 205 else: num_classes = 1000 model = models.__dict__[args.arch](num_classes=num_classes) # freeze all layers but the last fc for name, param in model.named_parameters(): if name not in ['fc.weight', 'fc.bias']: param.requires_grad = False # init the fc layer model.fc.weight.data.normal_(mean=0.0, std=0.01) model.fc.bias.data.zero_() # load from pre-trained, before DistributedDataParallel constructor if args.pretrained: if os.path.isfile(args.pretrained): print("=> loading checkpoint '{}'".format(args.pretrained)) checkpoint = torch.load(args.pretrained, map_location="cpu") state_dict = checkpoint['state_dict'] for k in list(state_dict.keys()): # retain only encoder_q up to before the embedding layer if k.startswith('module.encoder_q' ) and not k.startswith('module.encoder_q.fc'): # remove prefix state_dict[k[len("module.encoder_q."):]] = state_dict[k] # delete renamed or unused k del state_dict[k] args.start_epoch = 0 msg = model.load_state_dict(state_dict, strict=False) assert set(msg.missing_keys) == {"fc.weight", "fc.bias"} print("=> loaded pre-trained model '{}'".format(args.pretrained)) else: print("=> no checkpoint found at '{}'".format(args.pretrained)) if args.dropout != 0.0: model.fc = nn.Sequential(nn.Dropout(args.dropout), model.fc) if args.distributed: # For multiprocessing distributed, DistributedDataParallel constructor # should always set the single device scope, otherwise, # DistributedDataParallel will use all available devices. if args.gpu is not None: torch.cuda.set_device(args.gpu) model.cuda(args.gpu) # When using a single GPU per process and per # DistributedDataParallel, we need to divide the batch size # ourselves based on the total number of GPUs we have args.batch_size = int(args.batch_size / ngpus_per_node) args.workers = int( (args.workers + ngpus_per_node - 1) / ngpus_per_node) model = torch.nn.parallel.DistributedDataParallel( model, device_ids=[args.gpu]) else: model.cuda() # DistributedDataParallel will divide and allocate batch_size to all # available GPUs if device_ids are not set model = torch.nn.parallel.DistributedDataParallel(model) elif args.gpu is not None: torch.cuda.set_device(args.gpu) model = model.cuda(args.gpu) else: # DataParallel will divide and allocate batch_size to all available GPUs if args.arch.startswith('alexnet') or args.arch.startswith('vgg'): model.features = torch.nn.DataParallel(model.features) model.cuda() else: model = torch.nn.DataParallel(model).cuda() # define loss function (criterion) and optimizer criterion = nn.CrossEntropyLoss().cuda(args.gpu) # optimize only the linear classifier parameters = list(filter(lambda p: p.requires_grad, model.parameters())) assert len(parameters) == 2 # fc.weight, fc.bias optimizer = torch.optim.SGD(parameters, args.lr, momentum=args.momentum, weight_decay=args.weight_decay) # optionally resume from a checkpoint if args.resume: if os.path.isfile(args.resume): print("=> loading checkpoint '{}'".format(args.resume)) if args.gpu is None: checkpoint = torch.load(args.resume) else: # Map model to be loaded to specified single gpu. loc = 'cuda:{}'.format(args.gpu) checkpoint = torch.load(args.resume, map_location=loc) args.start_epoch = checkpoint['epoch'] best_acc1 = torch.tensor(checkpoint['best_acc1']) if args.gpu is not None: # best_acc1 may be from a checkpoint from a different GPU best_acc1 = best_acc1.to(args.gpu) model.load_state_dict(checkpoint['state_dict']) optimizer.load_state_dict(checkpoint['optimizer']) print("=> loaded checkpoint '{}' (epoch {})".format( args.resume, checkpoint['epoch'])) else: print("=> no checkpoint found at '{}'".format(args.resume)) cudnn.benchmark = True # Data loading code if args.dataset == "ImageNet": data_path = args.data traindir = os.path.join(data_path, 'train') valdir = os.path.join(data_path, 'val') normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) if args.train_strong: transform_train = transforms.Compose([ transforms.RandomResizedCrop(224, scale=(0.2, 1.)), transforms.RandomApply( [ transforms.ColorJitter(0.4, 0.4, 0.4, 0.1) # not strengthened ], p=0.8), transforms.RandomGrayscale(p=0.2), transforms.RandomApply([GaussianBlur([.1, 2.])], p=0.5), transforms.RandomHorizontalFlip(), transforms.ToTensor(), normalize ]) elif args.randcrop: transform_train = transforms.Compose([ transforms.RandomCrop(224, pad_if_needed=True), transforms.RandomHorizontalFlip(), transforms.ToTensor(), normalize, ]) else: transform_train = transforms.Compose([ transforms.RandomResizedCrop(224), transforms.RandomHorizontalFlip(), transforms.ToTensor(), normalize, ]) transform_test = transforms.Compose([ transforms.Resize(256), transforms.CenterCrop(224), transforms.ToTensor(), normalize, ]) train_dataset = datasets.ImageFolder(traindir, transform_train) val_dataset = datasets.ImageFolder(valdir, transform_test) if args.distributed: train_sampler = torch.utils.data.distributed.DistributedSampler( train_dataset) val_sampler = torch.utils.data.distributed.DistributedSampler( val_dataset, shuffle=True ) # different gpu forward individual based on its own statistics # val_sampler=None else: train_sampler = None val_sampler = None train_loader = torch.utils.data.DataLoader( train_dataset, batch_size=args.batch_size, shuffle=(train_sampler is None), num_workers=args.workers, pin_memory=True, sampler=train_sampler) val_loader = torch.utils.data.DataLoader( val_dataset, sampler=val_sampler, batch_size=args.batch_size, shuffle=(val_sampler is None), # different gpu forward is different, thus it's necessary num_workers=args.workers, pin_memory=True) elif args.dataset == "Place205": from data_processing.Place205_Dataset import Places205 normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) if args.train_strong: if args.randcrop: transform_train = transforms.Compose([ transforms.RandomCrop(224), transforms.RandomApply( [ transforms.ColorJitter(0.4, 0.4, 0.4, 0.1) # not strengthened ], p=0.8), transforms.RandomGrayscale(p=0.2), transforms.RandomApply([GaussianBlur([.1, 2.])], p=0.5), transforms.RandomHorizontalFlip(), transforms.ToTensor(), normalize ]) else: transform_train = transforms.Compose([ transforms.RandomResizedCrop(224, scale=(0.2, 1.)), transforms.RandomApply( [ transforms.ColorJitter(0.4, 0.4, 0.4, 0.1) # not strengthened ], p=0.8), transforms.RandomGrayscale(p=0.2), transforms.RandomApply([GaussianBlur([.1, 2.])], p=0.5), transforms.RandomHorizontalFlip(), transforms.ToTensor(), normalize ]) else: if args.randcrop: transform_train = transforms.Compose([ transforms.RandomCrop(224), transforms.RandomHorizontalFlip(), transforms.ToTensor(), normalize, ]) else: transform_train = transforms.Compose([ transforms.RandomResizedCrop(224), transforms.RandomHorizontalFlip(), transforms.ToTensor(), normalize, ]) # waiting to add 10 crop transform_valid = transforms.Compose([ transforms.Resize([256, 256]), transforms.CenterCrop(224), transforms.ToTensor(), normalize, ]) train_dataset = Places205(args.data, 'train', transform_train) valid_dataset = Places205(args.data, 'val', transform_valid) if args.distributed: train_sampler = torch.utils.data.distributed.DistributedSampler( train_dataset) val_sampler = torch.utils.data.distributed.DistributedSampler( valid_dataset, shuffle=False) # val_sampler = None else: train_sampler = None val_sampler = None train_loader = torch.utils.data.DataLoader( train_dataset, batch_size=args.batch_size, shuffle=(train_sampler is None), num_workers=args.workers, pin_memory=True, sampler=train_sampler) val_loader = torch.utils.data.DataLoader(valid_dataset, sampler=val_sampler, batch_size=args.batch_size, num_workers=args.workers, pin_memory=True) else: print("your dataset %s is not supported for finetuning now" % args.dataset) exit() if args.evaluate: validate(val_loader, model, criterion, args) return import datetime today = datetime.date.today() formatted_today = today.strftime('%y%m%d') now = time.strftime("%H:%M:%S") save_path = os.path.join(args.save_path, args.log_path) log_path = os.path.join(save_path, 'Finetune_log') mkdir(log_path) log_path = os.path.join(log_path, formatted_today + now) mkdir(log_path) # model_path=os.path.join(log_path,'checkpoint.pth.tar') lr_scheduler = None if args.sgdr == 1: lr_scheduler = torch.optim.lr_scheduler.CosineAnnealingLR( optimizer, 12) elif args.sgdr == 2: lr_scheduler = torch.optim.lr_scheduler.CosineAnnealingWarmRestarts( optimizer, args.sgdr_t0, args.sgdr_t_mult) for epoch in range(args.start_epoch, args.epochs): if args.distributed: train_sampler.set_epoch(epoch) if args.sgdr == 0: adjust_learning_rate(optimizer, epoch, args) train(train_loader, model, criterion, optimizer, epoch, args, lr_scheduler) # evaluate on validation set acc1 = validate(val_loader, model, criterion, args) # remember best acc@1 and save checkpoint is_best = acc1 > best_acc1 best_acc1 = max(acc1, best_acc1) if not args.multiprocessing_distributed or ( args.multiprocessing_distributed and args.rank % ngpus_per_node == 0): # add timestamp tmp_save_path = os.path.join(log_path, 'checkpoint.pth.tar') save_checkpoint( { 'epoch': epoch + 1, 'arch': args.arch, 'state_dict': model.state_dict(), 'best_acc1': best_acc1, 'optimizer': optimizer.state_dict(), }, is_best, filename=tmp_save_path) if abs(args.epochs - epoch) <= 20: tmp_save_path = os.path.join(log_path, 'model_%d.pth.tar' % epoch) save_checkpoint( { 'epoch': epoch + 1, 'arch': args.arch, 'state_dict': model.state_dict(), 'best_acc1': best_acc1, 'optimizer': optimizer.state_dict(), }, False, filename=tmp_save_path)
def Get_log_path(params): learning_rate = params['lr'] learning_rate1 = params['lr1'] reg = params['reg'] type = params['type'] lambda1 = params['lambda'] lambda2 = params['lambda1'] lambda3 = params['lambda2'] lambda4 = params['lambda3'] lambda5 = params['lambda4'] if params['S'] == '': log_path = os.path.join(os.getcwd(), params['log_path']) else: log_path = os.path.join(params['S'], params['log_path']) mkdir(log_path) dataset_name = params['dataset'] log_path = os.path.join(log_path, dataset_name) mkdir(log_path) if type == 0: log_path = os.path.join(log_path, 'MixMatch-5AETNEW') elif type == 1: log_path = os.path.join(log_path, 'MixMatch-5AETNEW-Wideresnet') elif type == 2: log_path = os.path.join(log_path, 'MixMatch-5AETNEW-Wideresnet500') elif type == 3: log_path = os.path.join(log_path, 'MixMatch-5AETNEW-Wideresnet1000') elif type == 4: log_path = os.path.join(log_path, 'MixMatch-5AETNEW-LargeWideresnet') elif type == 5: log_path = os.path.join(log_path, 'MixMatch-5AETNEW-Resnet152') mkdir(log_path) portion = params['portion'] log_path = os.path.join(log_path, 'Label_portion' + str(portion)) mkdir(log_path) log_path = os.path.join( log_path, 'lr_' + str(learning_rate) + 'lr1_' + str(learning_rate1) + '_reg_' + str(reg)) mkdir(log_path) log_path = os.path.join( log_path, 'lambda_' + str(lambda1) + '-' + str(lambda2) + '-' + str(lambda3) + '-' + str(lambda4) + '-' + str(lambda5)) mkdir(log_path) log_path = os.path.join(log_path, 'beta_' + str(params['beta'])) mkdir(log_path) today = datetime.date.today() formatted_today = today.strftime('%y%m%d') now = time.strftime("%H:%M:%S") log_path = os.path.join(log_path, formatted_today + now) mkdir(log_path) result_path = os.path.join(log_path, 'model') mkdir(result_path) return log_path, result_path
def Prepare_Input_Singe(file_path,random_id): split_lists=os.path.split(file_path) tmp_id=split_lists[1] #tmp_split=tmp_id.split('.') input_path=os.path.join(split_lists[0],tmp_id[:-4]+str(random_id)) mkdir(input_path) save_path = input_path #first prepare goap and itscore for it. work_decoy1 = os.path.join(input_path, 'complex.' + str(random_id) + '.pdb') work_decoy2 = os.path.join(input_path, str(random_id) + '_goap.pdb') #work_decoy3 = os.path.join(input_path, str(random_id) + '_itscore.pdb') #can't use ITSCore for license issue, please email to me for details:[email protected] os.system('cp '+file_path+' '+work_decoy1) #Gen goap and itscore pathroot=os.getcwd() Gen_GOAP(input_path) os.chdir(pathroot) # can't use ITSCore for license issue, please email to me for details:[email protected] #Gen_ITScore(input_path) #os.chdir(pathroot) #Generate input for our next step atom20_input = [] atom40_input = [] goap_input = [] itscore_input = [] atomgoap_input = [] atomitscore_input = [] goapitscore_input = [] agi_input = [] rcount=Get_Rcount(file_path) rlist, llist = form_atom_list(work_decoy1, rcount) rlist, llist = Form_interface(rlist, llist, 0) # This type doesn't matter tempload, rlength, llength = reform_input(rlist, llist, 1) for k in range(4): atom20_input.append(tempload[:, :, :, :, k]) # Here, we only use the no rotation input tempload, rlength, llength = reform_input(rlist, llist, 2) atom40_tmp = tempload for k in range(4): atom40_input.append(atom40_tmp[:, :, :, :, k]) # Then get goap rlist, llist = form_goap_list(work_decoy2, rcount) rlist, llist = Form_interface(rlist, llist, 0) tempload, rlength, llength = reform_goap_input(rlist, llist, 5) goap_tmp = tempload for k in range(4): goap_input.append(goap_tmp[:, :, :, :, k]) # Finally, get itscore # can't use ITSCore for license issue, please email to me for details:[email protected] #rlist, llist = form_itscore_list(work_decoy3, rcount) #rlist, llist = Form_interface(rlist, llist, 0) #tempload, rlength, llength = reform_goap_input(rlist, llist, 6) #itscore_tmp = tempload #for k in range(4): # itscore_input.append(itscore_tmp[:, :, :, :, k]) # Now combine them for k in range(4): atomgoap_tmp = np.zeros([20, 20, 20, 5]) atomgoap_tmp[:, :, :, 0:4] = atom40_tmp[:, :, :, :, k] atomgoap_tmp[:, :, :, 4:5] = goap_tmp[:, :, :, :, k] atomgoap_input.append(atomgoap_tmp) # can't use ITSCore for license issue, please email to me for details:[email protected] #atomgoap_tmp[:, :, :, 4:5] = itscore_tmp[:, :, :, :, k] #atomitscore_input.append(atomgoap_tmp) #goapitscore_tmp = np.zeros([20, 20, 20, 2]) #goapitscore_tmp[:, :, :, 0:1] = goap_tmp[:, :, :, :, k] #goapitscore_tmp[:, :, :, 1:2] = itscore_tmp[:, :, :, :, k] #goapitscore_input.append(goapitscore_tmp) #atomgoapitscore_tmp = np.zeros([20, 20, 20, 6]) #atomgoapitscore_tmp[:, :, :, 0:4] = atom40_tmp[:, :, :, :, k] #atomgoapitscore_tmp[:, :, :, 4:5] = goap_tmp[:, :, :, :, k] #atomgoapitscore_tmp[:, :, :, 5:6] = itscore_tmp[:, :, :, :, k] #agi_input.append(atomgoapitscore_tmp) #Save the result atom20_input = np.array(atom20_input) atom40_input = np.array(atom40_input) goap_input = np.array(goap_input) # can't use ITSCore for license issue, please email to me for details:[email protected] #itscore_input = np.array(itscore_input) atomgoap_input = np.array(atomgoap_input) #atomitscore_input = np.array(atomitscore_input) #goapitscore_input = np.array(goapitscore_input) #agi_input = np.array(agi_input) atom20_path = os.path.join(save_path, 'atom20.npy') np.save(atom20_path, atom20_input) atom40_path = os.path.join(save_path, 'atom40.npy') np.save(atom40_path, atom40_input) goap_path = os.path.join(save_path, 'goap.npy') np.save(goap_path, goap_input) #itscore_path = os.path.join(save_path, 'itscore.npy') #np.save(itscore_path, itscore_input) atomgoap_path = os.path.join(save_path, 'atomgoap.npy') np.save(atomgoap_path, atomgoap_input) #atomitscore_path = os.path.join(save_path, 'atomitscore.npy') #np.save(atomitscore_path, atomitscore_input) #goapitscore_path = os.path.join(save_path, 'goapitscore.npy') #np.save(goapitscore_path, goapitscore_input) #atomgoapitscore_path = os.path.join(save_path, 'atomgoapitscore.npy') #np.save(atomgoapitscore_path, agi_input) return input_path
def Prepare_Input_List(file_path, random_id): input_path = os.path.join(file_path, 'Dove' + str(random_id)) mkdir(input_path) save_path = input_path #First copy to generate complex.id.txt listfiles = os.listdir(file_path) listfiles.sort() atom20_input = [] atom40_input = [] goap_input = [] #itscore_input = [] atomgoap_input = [] #atomitscore_input = [] #goapitscore_input = [] #agi_input = [] id_path = os.path.join(save_path, 'final_id.txt') if os.path.exists(id_path): return input_path final_id = [] id_list = [] for random_id, item in enumerate(listfiles): if item[-4:] != '.pdb': continue tmp_file_path = os.path.join(file_path, item) #first prepare goap and itscore for it. work_decoy1 = os.path.join(input_path, 'complex.' + str(random_id) + '.pdb') os.system('cp ' + tmp_file_path + ' ' + work_decoy1) Correct_WrongFormat(work_decoy1) final_id.append(item) id_list.append(random_id) #Write final id record Write_List(id_path, final_id) #Gen goap and itscore pathroot = os.getcwd() Gen_GOAP(input_path) os.chdir(pathroot) # can't use anything related to ITSCore for license issue, please email to me for details:[email protected] #Gen_ITScore(input_path) #os.chdir(pathroot) #Generate input for our next step listfiles = os.listdir(input_path) for start_index, random_id in enumerate(id_list): #first prepare goap and itscore for it. work_decoy1 = os.path.join(input_path, 'complex.' + str(random_id) + '.pdb') work_decoy2 = os.path.join(input_path, str(random_id) + '_goap.pdb') #work_decoy3 = os.path.join(input_path, str(random_id) + '_itscore.pdb') #if start_index==0: rcount = Get_Rcount(work_decoy1) print("original receptor count %d" % rcount) rlist, llist = form_atom_list(work_decoy1, rcount) rlist, llist = Form_interface(rlist, llist, 0) # This type doesn't matter tempload, rlength, llength = reform_input(rlist, llist, 1) for k in range(4): atom20_input.append(tempload[:, :, :, :, k]) # Here, we only use the no rotation input tempload, rlength, llength = reform_input(rlist, llist, 2) atom40_tmp = tempload for k in range(4): atom40_input.append(atom40_tmp[:, :, :, :, k]) # Then get goap print("goap decoy name ", work_decoy2) rcount = Get_Rcount2(work_decoy2) print("current receptor rcount %d" % rcount) rlist, llist = form_goap_list(work_decoy2, rcount) rlist, llist = Form_interface(rlist, llist, 0) tempload, rlength, llength = reform_goap_input(rlist, llist, 5) goap_tmp = tempload for k in range(4): goap_input.append(goap_tmp[:, :, :, :, k]) # Finally, get itscore # can't use anything related to ITSCore for license issue, please email to me for details:[email protected] #rlist, llist = form_itscore_list(work_decoy3, rcount) #rlist, llist = Form_interface(rlist, llist, 0) #tempload, rlength, llength = reform_goap_input(rlist, llist, 6) #itscore_tmp = tempload #for k in range(4): # itscore_input.append(itscore_tmp[:, :, :, :, k]) # Now combine them for k in range(4): atomgoap_tmp = np.zeros([20, 20, 20, 5]) atomgoap_tmp[:, :, :, 0:4] = atom40_tmp[:, :, :, :, k] atomgoap_tmp[:, :, :, 4:5] = goap_tmp[:, :, :, :, k] atomgoap_input.append(atomgoap_tmp) # can't use anything related to ITSCore for license issue, please email to me for details:[email protected] #atomgoap_tmp[:, :, :, 4:5] = itscore_tmp[:, :, :, :, k] #atomitscore_input.append(atomgoap_tmp) # goapitscore_tmp = np.zeros([20, 20, 20, 2]) # goapitscore_tmp[:, :, :, 0:1] = goap_tmp[:, :, :, :, k] # goapitscore_tmp[:, :, :, 1:2] = itscore_tmp[:, :, :, :, k] # goapitscore_input.append(goapitscore_tmp) # # atomgoapitscore_tmp = np.zeros([20, 20, 20, 6]) # atomgoapitscore_tmp[:, :, :, 0:4] = atom40_tmp[:, :, :, :, k] # atomgoapitscore_tmp[:, :, :, 4:5] = goap_tmp[:, :, :, :, k] # atomgoapitscore_tmp[:, :, :, 5:6] = itscore_tmp[:, :, :, :, k] # agi_input.append(atomgoapitscore_tmp) #Save the result atom20_input = np.array(atom20_input) atom40_input = np.array(atom40_input) goap_input = np.array(goap_input) #itscore_input = np.array(itscore_input) atomgoap_input = np.array(atomgoap_input) #atomitscore_input = np.array(atomitscore_input) #goapitscore_input = np.array(goapitscore_input) #agi_input = np.array(agi_input) atom20_path = os.path.join(save_path, 'atom20.npy') np.save(atom20_path, atom20_input) atom40_path = os.path.join(save_path, 'atom40.npy') np.save(atom40_path, atom40_input) goap_path = os.path.join(save_path, 'goap.npy') np.save(goap_path, goap_input) #itscore_path = os.path.join(save_path, 'itscore.npy') #np.save(itscore_path, itscore_input) atomgoap_path = os.path.join(save_path, 'atomgoap.npy') np.save(atomgoap_path, atomgoap_input) #atomitscore_path = os.path.join(save_path, 'atomitscore.npy') #np.save(atomitscore_path, atomitscore_input) #goapitscore_path = os.path.join(save_path, 'goapitscore.npy') #np.save(goapitscore_path, goapitscore_input) #atomgoapitscore_path = os.path.join(save_path, 'atomgoapitscore.npy') #np.save(atomgoapitscore_path, agi_input) return input_path
def init_save_path(file_name): save_path = os.path.join(os.getcwd(), "predict_result") mkdir(save_path) save_path = os.path.join(save_path, file_name) mkdir(save_path) return save_path
def clf_predict(model, Overall_Segment_Array, imarray, save_path, params, origin_img_name): """ :param model: trained model :param Markers: segmented info array :param imarray: image array :param save_path: save path :param params: configs :param origin_img_name: image name :return: """ mean_value = (0.59187051, 0.53104666, 0.56797799) std_value = (0.19646512, 0.23195337, 0.20233912) height = params['height'] width = params['width'] #locating center coord for input images coord_list = Build_Coord_List( Overall_Segment_Array) # this coord for imarray not for image # change x,y locations, now the coord is back to images new_coord_list = [] for coord in coord_list: new_coord_list.append([coord[1], coord[0]]) coord_list = new_coord_list coord_list = np.array( coord_list) # coord now is based on image, instead of array tmp_coord_path = os.path.join(save_path, 'Coord_Info.txt') np.savetxt(tmp_coord_path, coord_list) print("DEBUG INFO: im array type", type(imarray)) #visualize candidate image center tmp_coord_figure_path = os.path.join(save_path, "Coord_Info.png") Draw_Coord_Figure(tmp_coord_figure_path, coord_list, imarray, height, width) #extract the candidate image for feeding into network feature_save_path = os.path.join(save_path, "input") mkdir(feature_save_path) count_image1 = 0 count_image1 = prepare_input_image(imarray, feature_save_path, count_image1, height, width, coord_list, 0) if count_image1 == len(coord_list): print("Successfully segmented image and saved!!!") else: print("Segmented part can not work, please have a check") return All_Predict_Img = load_input_array(feature_save_path, count_image1) #feeding to dataloader valid_dataset = SingleTestDataset(All_Predict_Img, mean_value, std_value) test_dataloader = torch.utils.data.DataLoader( valid_dataset, batch_size=params['batch_size'], shuffle=False, num_workers=int(params['num_workers']), drop_last=False, pin_memory=True) #making predicitons label_list = model_inference(model, test_dataloader, params, save_path, origin_img_name, coord_list) relabel_input_image(count_image1, feature_save_path, label_list) Visualize_Predict_Image(imarray, save_path, height, width, coord_list, label_list) Visualize_Detail_Predict_Image(imarray, save_path, height, width, coord_list, label_list, Overall_Segment_Array)
def init_log_path(args): """ :param args: :return: save model+log path """ save_path = os.path.join(os.getcwd(), args.log_path) mkdir(save_path) save_path = os.path.join(save_path, args.dataset) mkdir(save_path) save_path = os.path.join(save_path, "Alpha_" + str(args.alpha)) mkdir(save_path) save_path = os.path.join(save_path, "Aug_" + str(args.aug_times)) mkdir(save_path) save_path = os.path.join(save_path, "lr_" + str(args.lr)) mkdir(save_path) save_path = os.path.join(save_path, "cos_" + str(args.cos)) mkdir(save_path) today = datetime.date.today() formatted_today = today.strftime('%y%m%d') now = time.strftime("%H:%M:%S") save_path = os.path.join(save_path, formatted_today + now) mkdir(save_path) return save_path
'-F', type=str, required=True, help='decoy example path') # File path for our MAINMAST code parser.add_argument( '--id', type=int, default=888, help='random id for the webserver notification, make sure corresponding' ) # Dense points part parameters args = parser.parse_args() params = vars(args) command_line = '/usr/bin/python3 main.py --mode=0 -F ' + str( params['F']) + ' --id=' + str(params['id']) + ' --gpu=5' #In default,we do not use gpu. log_path = os.path.join(os.getcwd(), 'log') file_path = os.path.abspath(params['F']) mkdir(log_path) split_lists = os.path.split(file_path) tmp_log_path = os.path.join( log_path, split_lists[1] + '_jobid' + str(params['id']) + '.txt') batch_file = os.path.join(log_path, 'slurm-job' + str(split_lists[1]) + '.sh') with open(batch_file, 'w') as file: file.write('#!/usr/bin/env bash\n') file.write('\n') file.write('#SBATCH -o ' + tmp_log_path + '\n') file.write('#SBATCH -p kihara-gpu\n') file.write('#SBATCH --cpus-per-task=1\n') file.write('#SBATCH --ntasks=1\n') file.write(command_line + '\n') os.system('sbatch ' + batch_file)