def initialize_model(model, device, load_save_file=False): fnm = whoami() print('{}: device:{}({}), toch.cuda.device_count():{}, load_save_file:{}'. format(fnm, device, type(device), torch.cuda.device_count(), load_save_file)) if load_save_file: model.load_state_dict(torch.load(load_save_file)) pass else: for param in model.parameters(): if param.dim() == 1: continue nn.init.constant(param, 0) else: #nn.init.normal(param, 0.0, 0.15) nn.init.xavier_normal_(param) pass pass # end of for param ... pass # end of else #if 1 < torch.cuda.device_count(): if 1 < torch.cuda.device_count() and str(device).lower() != 'cpu': print("Let's use", torch.cuda.device_count(), "GPUs!") # dim = 0 [30, xxx] -> [10, ...], [10, ...], [10, ...] on 3 GPUs model = nn.DataParallel(model) pass model.to(device) return model
def set_cuda_visible_device(ngpus): fnm = whoami() import subprocess import os empty = list() for i in range(8): command = 'nvidia-smi -i ' + str(i) + ' | grep -i "No running" | wc -l' #print('{}: #{}: command:{}'.format(fnm, i, command)) output = subprocess.check_output(command, shell=True).decode("utf-8") #print('nvidia-smi -i '+str(i)+' | grep "No running" | wc -l > empty_gpu_check') if int(output) == 1: empty.append(i) pass pass if len(empty) < ngpus: print('avaliable gpus are less than required') exit(-1) cmd = '' for i in range(ngpus): cmd += str(empty[i]) + ',' pass return cmd
def __init__(self, keys, data_dir): fnm = __class__.__name__ + '.' + whoami() print('{}:len(keys):{}, keys[:5]:{}, data_dir:{}'.format( fnm, len(keys), keys[:5], data_dir)) self.keys = keys self.data_dir = data_dir self.proc_info_printed = False self.n_queried = 0 self.n_max_n1 = 0 self.n_max_n2 = 0 self.n_max_adj = 0 self.n_file_opened = 0 pass
def __init__(self, weights, n_samples, replacement=True): fnm = __class__.__name__ + '.' + whoami() print( '{}:#1: np.sum(weights):{}, weights.shape:{}, n_samples:{}'.format( fnm, np.sum(weights), len(weights), n_samples)) weights = np.array(weights) / np.sum(weights) print( '{}:#2: np.sum(weights):{}, weights.shape:{}, n_samples:{}'.format( fnm, np.sum(weights), weights.shape, n_samples)) self.weights = weights self.n_samples = n_samples self.replacement = replacement pass
def __getitem__(self, idx): fnm = __class__.__name__ + '.' + whoami() self.n_queried += 1 #idx = 0 key = self.keys[idx] data_file_path = os.path.join(self.data_dir, key) #with open(self.data_dir+'/'+key, 'rb') as f: with open(data_file_path, 'rb') as f: m1, m2 = pickle.load(f) self.n_file_opened += 1 pass if not self.proc_info_printed: print('{}: data_file_path:{}, type(m1):{}, type(m2):{}'.format( fnm, data_file_path, type(m1), type(m2))) pass # # prepare ligand # #m1 = Chem.AddHs(m1, addCoords=True, addResidueInfo=True) # 2020-03-26 added by caleb n1 = m1.GetNumAtoms() c1 = m1.GetConformers( )[0] # m1.GetConformers() 함수는 1개의 rdkit.Chem.rdchem.Conformer object 만을 되돌려 줌 d1 = np.array(c1.GetPositions()) #adj1 = GetAdjacencyMatrix(m1) + np.eye(n1) adj = GetAdjacencyMatrix(m1) + np.eye(n1) if n1 <= N_PADDED_LIGAND: adj1 = np.zeros((N_PADDED_LIGAND, N_PADDED_LIGAND), dtype=np.float64) adj1[:n1, :n1] = adj pass else: adj1 = adj[:N_PADDED_LIGAND, :N_PADDED_LIGAND] pass #H1 = get_atom_feature(m1, True) H1 = get_atom_feature(m1, n1, True) # # prepare protein # #m2 = Chem.AddHs(m2, addCoords=True, addResidueInfo=True) # 2020-03-26 added by caleb n2 = m2.GetNumAtoms() c2 = m2.GetConformers()[0] d2 = np.array(c2.GetPositions()) #adj2 = GetAdjacencyMatrix(m2)+np.eye(n2) adj = GetAdjacencyMatrix(m2) + np.eye(n2) if n2 <= N_PADDED_PROTEIN: adj2 = np.zeros((N_PADDED_PROTEIN, N_PADDED_PROTEIN), dtype=np.float64) adj2[:n2, :n2] = adj pass else: adj2 = adj[:N_PADDED_PROTEIN, :N_PADDED_PROTEIN] pass #H2 = get_atom_feature(m2, False) H2 = get_atom_feature(m2, n2, False) # aggregation H = np.concatenate([H1, H2], axis=0) ''' agg_adj1 = np.zeros((n1+n2, n1+n2)) agg_adj1[:n1, :n1] = adj1 agg_adj1[n1:, n1:] = adj2 agg_adj2 = np.copy(agg_adj1) dm = distance_matrix(d1,d2) agg_adj2[:n1,n1:] = np.copy(dm) agg_adj2[n1:,:n1] = np.copy(np.transpose(dm)) #node indice for aggregation valid = np.zeros((n1+n2,)) valid[:n1] = 1 ''' agg_adj1 = np.zeros((N_PADDED_ALL, N_PADDED_ALL)) agg_adj1[:N_PADDED_LIGAND, :N_PADDED_LIGAND] = adj1 agg_adj1[N_PADDED_LIGAND:, N_PADDED_LIGAND:] = adj2 agg_adj2 = np.copy(agg_adj1) dm = distance_matrix(d1, d2) # # 2020-03-27 # * (계산의 편의를 위해) 무식하게 최대크기(라고 가정한) 매트릭스를 특정값으로 세팅함 # * 거리정보가 없는 녀석들은 먼거리(여기서는 100.0)로 세팅해 놓음 --> 그냥 0으로 세팅함 # #dm_padded = np.full((N_PADDED_LIGAND_MAX, N_PADDED_PROTEIN_MAX), fill_value=100.0, dtype=np.float64) dm_padded = np.zeros((N_PADDED_LIGAND_MAX, N_PADDED_PROTEIN_MAX), dtype=np.float64) dm_padded[:n1, :n2] = dm dm = dm_padded[:N_PADDED_LIGAND, :N_PADDED_PROTEIN] #agg_adj2[:n1,n1:] = np.copy(dm) #agg_adj2[n1:,:n1] = np.copy(np.transpose(dm)) agg_adj2[:N_PADDED_LIGAND, N_PADDED_LIGAND:] = np.copy(dm) agg_adj2[N_PADDED_LIGAND:, :N_PADDED_LIGAND] = np.copy( np.transpose(dm)) #node indice for aggregation #valid = np.zeros((n1+n2,)) #valid[:n1] = 1 valid = np.zeros((N_PADDED_ALL, )) valid[:N_PADDED_LIGAND] = 1 #pIC50 to class Y = 1 if 'CHEMBL' in key else 0 #if n1+n2 > 300 : return None sample = { 'H' : H , \ 'A1' : agg_adj1, \ 'A2' : agg_adj2, \ 'Y' : Y , \ 'V' : valid , \ 'key': key , \ } if self.n_max_n1 < n1: self.n_max_n1 = n1 pass if self.n_max_n2 < n2: self.n_max_n2 = n2 pass if self.n_max_adj < n1 + n2: self.n_max_adj = n1 + n2 pass if not self.proc_info_printed: #print('{}: n1:{}, n2:{}, H.shape:{}, A1.shape:{}, A2.shape:{}, Y.shape:{}, V.shape:{}, key:{}'.format( # fnm, n1, n2, H.shape, adj1.shape, adj2.shape, Y.shape, V.shape, key)) #print('{}: n1:{}, n2:{}, type(H):{}, type(adj1):{}, type(adj2):{}, type(Y):{}, type(valid):{}({}), key:{}'.format( # fnm, n1, n2, type(H), type(adj1), type(adj2), type(Y), type(valid)(valid[:10]), key[:10])) print( '{}: n1:{}, n2:{}, H.shape:{}, adj1.shape:{}, adj2.shape:{}, type(Y):{}, type(valid):{}, type(key):{}:{}' .format(fnm, n1, n2, H.shape, adj1.shape, adj2.shape, type(Y), type(valid), type(key), key)) pass self.proc_info_printed = True return sample