# author: David Hurwitz # started: 8/17/19 # from NN_formatted_data import OneNNData, NUM_PIXELS_1D from NN_misc import PIXEL_LEN import random import numpy as np AtomRadiusSqr = PIXEL_LEN * PIXEL_LEN NumAtoms = 40 MidSlice = round(NUM_PIXELS_1D / 2) # make an empty OneNNData data = OneNNData() for ii in range(100): # get NumAtoms random coordinates. these are the true coordinates. atomPositions = data.GetRandomPositions2(NumAtoms, AtomRadiusSqr) atomTypes = ['C'] * NumAtoms # calculate the density map for atomPositions and put it on the NN output. data.MakeMap(atomPositions, atomTypes, False, AtomRadiusSqr, ClearData=True) # calculate the Patterson map from the NN output and put it on the NN input. this is the true Patterson map. # don't need to save the density map on the NN output any longer.
#----------------------------------------------------------- # FileListTrain = "sim_files_000_to_029.txt" # 000_to_004 or 000_to_029 # FileListValidate = "sim_files_030_to_034.txt" # 005_to_009 or 030_to_034 FileListTrain = "sim_files_000_to_000.txt" # 000_to_004 or 000_to_029 FileListValidate = "sim_files_000_to_000.txt" # 005_to_009 or 030_to_034 print("reading the training files listed in: " + CSV_Path + FileListTrain) allLinesTrain = AllLines() ReadFilesInList(allLinesTrain, CSV_Path, FileListTrain) print("reading the validation files list in: " + CSV_Path + FileListValidate) allLinesValidate = AllLines() ReadFilesInList(allLinesValidate, CSV_Path, FileListValidate) #------------------------------------------------------------------------------- # make a OneNNData to get array sizes for the NN #------------------------------------------------------------------------------- data = OneNNData() inShape = data.InData.shape outShape = data.OutData.shape #------------------------------------------------------------------------------- # make the Keras Functional API model. #------------------------------------------------------------------------------- input = Input(shape=data.InData.shape) L01a = Conv3D(20, 5, activation='relu', padding='same', kernel_initializer='he_normal')(input) L01b = Conv3D(20, 5, activation='relu',
allLinesValidate = AllLines() ReadFilesInList(allLinesValidate, CSV_Path, FileListValidate) #----------------------------------------------------------- # read the connection file for this molecule #----------------------------------------------------------- ConnectionFile = "C:/Users/david/Documents/newff/results/NN/simulations/mol006_sim000.connections.csv" print("reading connections file: " + ConnectionFile) Connections = ProteinConnections(ConnectionFile) TotalNumConnections = Connections.getTotalNumConnections() print("check: total num connections = " + str(TotalNumConnections)) #------------------------------------------------------------------------------- # make a OneNNData to get array sizes for the NN #------------------------------------------------------------------------------- data = OneNNData( SizedForNN=True) # default InData size is larger for extra workspace inShape = data.InData.shape outShape = data.OutData.shape #------------------------------------------------------------------------------- # make the Keras Functional API model. #------------------------------------------------------------------------------- # input shape is: (?, 100, 100, 100, 16) input = Input(shape=data.InData.shape) L01 = Conv3D(18, 5, activation='relu', padding='same', kernel_initializer='he_normal')(input) L02 = Conv3D(18, 5,
# get the batch arrays into a format that is suitable for the NN: [batchsize, nx, ny, nz, numChannels] (bigInputArray, bigOutputArray) = batchData.makeBigArrays() print("bigInputArray.shape = ", bigInputArray.shape) print("bigOutputArray.shape = ", bigOutputArray.shape) # check that the shape is right for the NN OneInput = bigInputArray[0] OneOutput = bigOutputArray[0] print("OneInput.shape = ", OneInput.shape) print("OneOutput.shape = ", OneOutput.shape) # leaving off here # create this OneNNData object so we can use its member functions data = OneNNData(SizedForNN=True) # look at one slice of one output density map at different atom-radii batchData.getOneItem(0).OutData = bigOutputArray[0] batchData.getOneItem(0).PrintSlice(2, 50, False) batchData.getOneItem(0).PrintHistogram(False, 20, 'test data') batchData.getOneItem(0).ReMakeMap(False, 1.50 * PIXEL_LEN) (bigInputArray2, bigOutputArray2) = batchData.makeBigArrays() batchData.getOneItem(0).OutData = bigOutputArray[0] batchData.getOneItem(0).PrintSlice(2, 50, False) batchData.getOneItem(0).PrintHistogram(False, 20, 'test data') batchData.getOneItem(0).ReMakeMap(False, 2.00 * PIXEL_LEN) (bigInputArray3, bigOutputArray3) = batchData.makeBigArrays() batchData.getOneItem(0).OutData = bigOutputArray[0]
allLinesValidate = AllLines() ReadFilesInList(allLinesValidate, CSV_Path, FileListValidate) #----------------------------------------------------------- # read the connection file for this molecule #----------------------------------------------------------- ConnectionFile = "C:/Users/david/Documents/newff/results/NN/simulations/mol006_sim000.connections.csv" print("reading connections file: " + ConnectionFile) Connections = ProteinConnections(ConnectionFile) TotalNumConnections = Connections.getTotalNumConnections() print("check: total num connections = " + str(TotalNumConnections)) #------------------------------------------------------------------------------- # make a OneNNData for general use. #------------------------------------------------------------------------------- data = OneNNData(SizedForNN=True) #------------------------------------------------------------------------------- # Append 8 extra channels to OneNNData::InData # Need 4 for EstimateAtomPositionsFromDensityMapsSuperAccurate # Need 8 for EstimateAtomPositionsFromDensityMapsSuperDuperAccurate #------------------------------------------------------------------------------- np_1d = NUM_PIXELS_1D ExtraSpace = np.zeros(shape=(np_1d, np_1d, np_1d, 8), dtype=float32) data.InData = np.append(data.InData, ExtraSpace, axis=3) #------------------------------------------------------------------------------- # make the first NN input for the trajectory #------------------------------------------------------------------------------- Batch = OneBatch(BatchSize) Batch.makeABatch(allLinesValidate,
from numpy import float32 AtomRadius = 1.0 * PIXEL_LEN AtomRadiusSqr = AtomRadius * AtomRadius AtomRadiusCubed = AtomRadius * AtomRadius * AtomRadius molNum = 5 CSV_Path = "C:/Users/david/Documents/newff/results/NN/simulations/mol_05/csv_files/" FileList = "sim_files_000_to_000.txt" # 1 simulation file is in this list # read in all the CSV files that hold the MD simulation data # the data is stored in an AllLines object Lines = AllLines() ReadFilesInList(Lines, CSV_Path, FileList) data = OneNNData() # make empty data arrays #--------------------------------- # test (x, y, z) -> (i, j, k) #--------------------------------- sn = SMALL_NUM ps2 = PIXEL_LEN / 2 ps9 = 9 * PIXEL_LEN / 10 np1 = NUM_PIXELS_1D - 1 np2 = int(NUM_PIXELS_1D / 2) np21 = int(NUM_PIXELS_1D / 2) - 1 # test: (BOX_MIN+sn, BOX_MIN+sn, BOX_MIN+sn) -> (0,0,0) Pos = (BOX_MIN + sn, BOX_MIN + sn, BOX_MIN + sn) (i, j, k) = data.GetPixel(Pos) assert (i == 0 and j == 0 and k == 0)
def __init__(self, numInBatch): self.trainingExamples = [] for i in range(numInBatch): aTrainingExample = OneNNData() self.trainingExamples.append(aTrainingExample)
#----------------------------------------------------------- # FileListTrain = "sim_files_000_to_029.txt" # 000_to_004 or 000_to_029 # FileListValidate = "sim_files_030_to_034.txt" # 005_to_009 or 030_to_034 FileListTrain = "sim_files_000_to_000.txt" # 000_to_004 or 000_to_029 FileListValidate = "sim_files_030_to_030.txt" # 005_to_009 or 030_to_034 print("reading the training files listed in: " + CSV_Path + FileListTrain) allLinesTrain = AllLines() ReadFilesInList(allLinesTrain, CSV_Path, FileListTrain) print("reading the validation files list in: " + CSV_Path + FileListValidate) allLinesValidate = AllLines() ReadFilesInList(allLinesValidate, CSV_Path, FileListValidate) #------------------------------------------------------------------------------- # make a OneNNData to get array sizes for the NN #------------------------------------------------------------------------------- data = OneNNData() inShape = data.InData.shape outShape = data.OutData.shape #------------------------------------------------------------------------------- # make the Keras Functional API model. #------------------------------------------------------------------------------- input = Input(shape=data.InData.shape) L01a = Conv3D(20, 5, activation='relu', padding='same', kernel_initializer='he_normal')(input) L01b = Conv3D(20, 5, activation='relu',
(j, dir[0], dir[1], dir[2], mag)) aLine1 = aLine2 #----------------------------------------------------------- # read the connection file for this molecule #----------------------------------------------------------- ConnectionFile = "C:/Users/david/Documents/newff/results/NN/simulations/mol006_sim000.connections.csv" print("reading connections file: " + ConnectionFile) Connections = ProteinConnections(ConnectionFile) TotalNumConnections = Connections.getTotalNumConnections() print("check: total num connections = " + str(TotalNumConnections)) #------------------------------------------------------------------------------- # make a OneNNData to get array sizes for the NN #------------------------------------------------------------------------------- data = OneNNData( SizedForNN=True) # default InData size is larger for extra workspace inShape = data.InData.shape outShape = data.OutData.shape #------------------------------------------------------------------------------- # make a batch of formatted data for validation #------------------------------------------------------------------------------- validationBatch = OneBatch(ValidationBatchSize) validationBatch.makeABatch(allLinesValidate, Connections, InRadiusSqr, OutRadiusSqr, doRotation=True, batchType='validation', timeStepInterval=TimeStepInterval) (bigInputValidationArray,