def predict(M, N, inmodel): prediction_1 = np.zeros((N, N)) for low_resolution_samples, index in utils.divide(M): #print(index.shape) batch_size = low_resolution_samples.shape[0] #256 lowres_set = data.TensorDataset( torch.from_numpy(low_resolution_samples), torch.from_numpy(np.zeros(low_resolution_samples.shape[0]))) try: lowres_loader = torch.utils.data.DataLoader(lowres_set, batch_size=batch_size, shuffle=False) except: continue hires_loader = lowres_loader m = model.Net(40, 28) m.load_state_dict(torch.load(inmodel, map_location=torch.device('cpu'))) if torch.cuda.is_available(): m = m.cuda() for i, v1 in enumerate(lowres_loader): _lowRes, _ = v1 _lowRes = Variable(_lowRes).float() if use_gpu: _lowRes = _lowRes.cuda() y_prediction = m(_lowRes) y_predict = y_prediction.data.cpu().numpy() # recombine samples length = int(y_predict.shape[2]) y_predict = np.reshape(y_predict, (y_predict.shape[0], length, length)) for i in range(0, y_predict.shape[0]): x = int(index[i][1]) y = int(index[i][2]) #print np.count_nonzero(y_predict[i]) prediction_1[x + 6:x + 34, y + 6:y + 34] = y_predict[i] return (prediction_1)
def prediction(M,N,inmodel): low_resolution_samples, index = utils.divide(M) batch_size = low_resolution_samples.shape[0] #256 lowres_set = data.TensorDataset(torch.from_numpy(low_resolution_samples), torch.from_numpy(np.zeros(low_resolution_samples.shape[0]))) lowres_loader = torch.utils.data.DataLoader(lowres_set, batch_size=batch_size, shuffle=False) hires_loader = lowres_loader m = model.Net(40, 28) if torch.cuda.is_available(): m.load_state_dict(torch.load(inmodel)).cuda() m.load_state_dict(torch.load(inmodel, map_location=torch.device('cpu'))) for i, v1 in enumerate(lowres_loader): _lowRes, _ = v1 _lowRes = Variable(_lowRes).float() if use_gpu: _lowRes = _lowRes.cuda() y_prediction = m(_lowRes) y_predict = y_prediction.data.cpu().numpy() # recombine samples length = int(y_predict.shape[2]) y_predict = np.reshape(y_predict, (y_predict.shape[0], length, length)) #length = int(chrs_length[chrN-1]/expRes) prediction_1 = np.zeros((N, N)) #print('predicted sample: ', y_predict.shape, ') #; index shape is: ', index.shape) #print index for i in range(0, y_predict.shape[0]): #if (int(index[i][1]) != chrN): # continue #print index[i] x = int(index[i][1]) y = int(index[i][2]) #print np.count_nonzero(y_predict[i]) prediction_1[x+6:x+34, y+6:y+34] = y_predict[i] prediction_2 = prediction_1[6:N-6, 6:N-6] return(prediction_2)
def train(lowres,highres, outModel): low_resolution_samples = lowres.astype(np.float32) * down_sample_ratio high_resolution_samples = highres.astype(np.float32) low_resolution_samples = np.minimum(HiC_max_value, low_resolution_samples) high_resolution_samples = np.minimum(HiC_max_value, high_resolution_samples) # Reshape the high-quality Hi-C sample as the target value of the training. sample_size = low_resolution_samples.shape[-1] padding = conv2d1_filters_size + conv2d2_filters_size + conv2d3_filters_size - 3 half_padding = padding // 2 output_length = sample_size - padding Y = [] for i in range(high_resolution_samples.shape[0]): no_padding_sample = high_resolution_samples[i][0][half_padding:(sample_size-half_padding) , half_padding:(sample_size - half_padding)] Y.append(no_padding_sample) Y = np.array(Y).astype(np.float32) print(low_resolution_samples.shape, Y.shape) lowres_set = data.TensorDataset(torch.from_numpy(low_resolution_samples), torch.from_numpy(np.zeros(low_resolution_samples.shape[0]))) print(len(lowres_set)) lowres_loader = torch.utils.data.DataLoader(lowres_set, batch_size=batch_size, shuffle=False) hires_set = data.TensorDataset(torch.from_numpy(Y), torch.from_numpy(np.zeros(Y.shape[0]))) hires_loader = torch.utils.data.DataLoader(hires_set, batch_size=batch_size, shuffle=False) Net = model.Net(40, 28) if use_gpu: Net = Net.cuda() optimizer = optim.SGD(Net.parameters(), lr = 0.0001) _loss = nn.MSELoss() Net.train() running_loss = 0.0 running_loss_validate = 0.0 reg_loss = 0.0
def train(lowres,highres, outModel): low_resolution_samples = lowres.astype(np.float32) * down_sample_ratio high_resolution_samples = highres.astype(np.float32) low_resolution_samples = np.minimum(HiC_max_value, low_resolution_samples) high_resolution_samples = np.minimum(HiC_max_value, high_resolution_samples) # Reshape the high-quality Hi-C sample as the target value of the training. sample_size = low_resolution_samples.shape[-1] padding = conv2d1_filters_size + conv2d2_filters_size + conv2d3_filters_size - 3 half_padding = padding // 2 output_length = sample_size - padding Y = [] for i in range(high_resolution_samples.shape[0]): no_padding_sample = high_resolution_samples[i][0][half_padding:(sample_size-half_padding) , half_padding:(sample_size - half_padding)] Y.append(no_padding_sample) Y = np.array(Y).astype(np.float32) print(low_resolution_samples.shape, Y.shape) lowres_set = data.TensorDataset(torch.from_numpy(low_resolution_samples), torch.from_numpy(np.zeros(low_resolution_samples.shape[0]))) print(len(lowres_set)) lowres_loader = torch.utils.data.DataLoader(lowres_set, batch_size=batch_size, shuffle=False) hires_set = data.TensorDataset(torch.from_numpy(Y), torch.from_numpy(np.zeros(Y.shape[0]))) hires_loader = torch.utils.data.DataLoader(hires_set, batch_size=batch_size, shuffle=False) Net = model.Net(40, 28) if use_gpu: Net = Net.cuda() optimizer = optim.SGD(Net.parameters(), lr = 0.0001) _loss = nn.MSELoss() Net.train() running_loss = 0.0 running_loss_validate = 0.0 reg_loss = 0.0 # write the log file to record the training process with open('HindIII_train.txt', 'w') as log: for epoch in range(0, 3500): am=zip(lowres_loader, hires_loader) #print('hi') if (epoch % 100 == 0): torch.save(Net.state_dict(), './model/test_epoch'+str(epoch)+'.pth') print('saved '+'epoch'+str(epoch)) for i, (v1, v2) in enumerate(zip(lowres_loader, hires_loader)): if (i == len(lowres_loader) - 1): continue _lowRes, _ = v1 _highRes, _ = v2 _lowRes = Variable(_lowRes) _highRes = Variable(_highRes).unsqueeze(1) if use_gpu: _lowRes = _lowRes.cuda() _highRes = _highRes.cuda() optimizer.zero_grad() y_prediction = Net(_lowRes) loss = _loss(y_prediction, _highRes) print(loss) loss.backward() optimizer.step() running_loss += loss.item() #if len(am) // 100 ==0: print('-------', i, epoch, running_loss/i, strftime("%Y-%m-%d %H:%M:%S", gmtime())) print(str(epoch) + ', ' + str(running_loss/i,) +', '+ strftime("%Y-%m-%d %H:%M:%S", gmtime())+ '\n') running_loss = 0.0 running_loss_validate = 0.0
def prediction(M,N,inmodel): low_resolution_samples, index = utils.divide(M) # print('M: ',M) # print('M.shape: ', M.shape) batch_size = low_resolution_samples.shape[0] #256 lowres_set = data.TensorDataset(torch.from_numpy(low_resolution_samples), torch.from_numpy(np.zeros(low_resolution_samples.shape[0]))) lowres_loader = torch.utils.data.DataLoader(lowres_set, batch_size=batch_size, shuffle=False) hires_loader = lowres_loader m = model.Net(40, 28) if torch.cuda.is_available() and use_gpu: # m.load_state_dict(torch.load(inmodel)).cuda() m.load_state_dict(torch.load(inmodel)['model_state_dict'].cuda()) # m.load_state_dict(torch.load(inmodel, map_location=torch.device('cpu'))) loaded_model = torch.load(inmodel, map_location=torch.device('cpu')) if 'model_state_dict' in loaded_model: m.load_state_dict(torch.load(inmodel, map_location=torch.device('cpu'))['model_state_dict']) else: m.load_state_dict(torch.load(inmodel, map_location=torch.device('cpu'))) for i, v1 in enumerate(lowres_loader): _lowRes, _ = v1 _lowRes = Variable(_lowRes).float() if use_gpu: _lowRes = _lowRes.cuda() y_prediction = m(_lowRes) # parenthesis y_predict = y_prediction.data.cpu().numpy() # recombine samples length = int(y_predict.shape[2]) y_predict = np.reshape(y_predict, (y_predict.shape[0], length, length)) #length = int(chrs_length[chrN-1]/expRes) trunc_offset1 = int((40-y_predict.shape[1])/2) trunc_offset2 = int(trunc_offset1 + y_predict.shape[1]) # print(trunc_offset1,' ', trunc_offset2) prediction_1 = np.zeros((N, N)) # print('initializing np array of: ',(N,N)) # print('shape of each element: ',prediction_1[0]) # print('predicted sample: ', y_predict.shape, ') #; index shape is: ', index.shape) # print(index) for i in range(0, y_predict.shape[0]): #if (int(index[i][1]) != chrN): # continue #print index[i] x = int(index[i][1]) y = int(index[i][2]) #print np.count_nonzero(y_predict[i]) # print(prediction_1.shape, y_predict.shape) # print(prediction_1[x+9:x+31, y+9:y+31].shape, y_predict[i].shape) # print(prediction_1[x+6:x+34, y+6:y+34]) # prediction_1[x+6:x+34, y+6:y+34] = y_predict[i] prediction_1[x + trunc_offset1:x + trunc_offset2, y + trunc_offset1:y + trunc_offset2] = y_predict[i] # prediction_2 = prediction_1[6:N-6, 6:N-6] prediction_2 = prediction_1[trunc_offset1:(N - trunc_offset1), trunc_offset1:(N - trunc_offset1)] return(prediction_2)
def train(lowres, highres, outModel, checkpoint_file): low_resolution_samples = lowres.astype(np.float32) * down_sample_ratio high_resolution_samples = highres.astype(np.float32) low_resolution_samples = np.minimum(HiC_max_value, low_resolution_samples) high_resolution_samples = np.minimum(HiC_max_value, high_resolution_samples) # Reshape the high-quality Hi-C sample as the target value of the training. sample_size = low_resolution_samples.shape[-1] padding = conv2d1_filters_size + conv2d2_filters_size + conv2d3_filters_size - 3 half_padding = padding // 2 output_length = sample_size - padding Y = [] for i in range(high_resolution_samples.shape[0]): no_padding_sample = high_resolution_samples[i][0][half_padding:( sample_size - half_padding), half_padding:(sample_size - half_padding)] Y.append(no_padding_sample) Y = np.array(Y).astype(np.float32) logging.debug('low_resulution_sample.shape:' + str(low_resolution_samples.shape) + ', Y.shape' + str(Y.shape)) lowres_set = data.TensorDataset( torch.from_numpy(low_resolution_samples), torch.from_numpy(np.zeros(low_resolution_samples.shape[0]))) lowres_loader = torch.utils.data.DataLoader(lowres_set, batch_size=batch_size, shuffle=False) hires_set = data.TensorDataset(torch.from_numpy(Y), torch.from_numpy(np.zeros(Y.shape[0]))) hires_loader = torch.utils.data.DataLoader(hires_set, batch_size=batch_size, shuffle=False) Net = model.Net(40, 28) if use_gpu: Net = Net.cuda() optimizer = optim.SGD(Net.parameters(), lr=0.00001) _loss = nn.MSELoss() #Net.train() running_loss = 0.0 running_loss_validate = 0.0 reg_loss = 0.0 # write the log file to record the training process # with open('HindIII_train.txt', 'w') as log: #for epoch in range(0, 3500): curDate = strftime("%Y-%m-%d", localtime()) try: os.mkdir(os.getcwd() + '/model/' + curDate + '/') except FileExistsError: pass start_epoch = 0 if checkpoint_file is not None: checkpoint = torch.load(checkpoint_file) for i in checkpoint: print(i) start_epoch = checkpoint['epoch'] running_loss = checkpoint['loss'] optimizer.load_state_dict(checkpoint['optimizer_state_dict']) Net.load_state_dict(checkpoint['model_state_dict']) Net.train() trainTimer = time.time() for epoch in range(start_epoch, epochs): for i, (v1, v2) in enumerate(zip(lowres_loader, hires_loader)): if (i == len(lowres_loader) - 1): continue _lowRes, _ = v1 _highRes, _ = v2 _lowRes = Variable(_lowRes) _highRes = Variable(_highRes).unsqueeze(1) if use_gpu: _lowRes = _lowRes.cuda() _highRes = _highRes.cuda() optimizer.zero_grad() y_prediction = Net(_lowRes) loss = _loss(y_prediction, _highRes) loss.backward() optimizer.step() running_loss += loss.item() logging.info('TRAINING INFO: \n' + '-------' + str(i) + ' Epoch: ' + str(epoch) + ' running_loss/i: ' + str(running_loss / i) + '\n-----time:' + str(strftime("%Y-%m-%d %H:%M:%S", localtime()))) logging.debug( 'Training progress: ' + str(epoch / 3500) + f' || Epoch: {epoch}, Total Epoch: 3500 || Training Time Elapse: {time.time() - trainTimer}' ) running_loss = 0.0 running_loss_validate = 0.0 # save the model every 100 epoches if (epoch % 100 == 0): logging.debug(f'epoch[{epoch}]: Saving Model') torch.save( { 'epoch': epoch, 'model_state_dict': Net.state_dict(), 'optimizer_state_dict': optimizer.state_dict(), 'loss': running_loss }, 'model/' + curDate + '/' + outModel + str(epoch) + str('.model')) pass