size = 300 #EDIT ME! #how large the images are outType = "justbonds" #EDIT ME! #what the CNN is predicting #imdim = size - 20 #strip 10 pixels buffer from each size direct = "../data/SDF/" #directory containing the SD files ld = listdir(direct) #contents of that directory shuffle(ld) #shuffle the image list for randomness numEx = len(ld) #number of images in the directory DUMP_WEIGHTS = True #will we dump the weights of conv layers for visualization trainTestSplit = 0.90 #percentage of data to use as training data batch_size = 32 #how many training examples per batch chunkSize = 50000 #how much data to ever load at once testChunkSize = 6000 #how many examples to evaluate per iteration run = "1" """Define the folder where the model will be stored based on the input arguments""" folder = helperFuncs.defineFolder(False, outType, size, run) print folder trainDirect = folder + "tempTrain/" testDirect = folder + "tempTest/" #if update: # stop = raw_input("Loading from folder "+folder+" : Hit enter to proceed or ctrl+C to cancel") #else: # print "Initializing in folder "+folder """Load the train/test split information if update, else split and write out which images are in which dataset""" trainFs, testFs = helperFuncs.getTrainTestSplit(False, folder, numEx, trainTestSplit, ld) trainL = len(trainFs) testL = len(testFs) print "number of examples: ", numEx
# depth = 2 #size of the first receptive field # run = "" """Define parameters of the run""" imdim = size - 20 #strip 10 pixels buffer from each size direct = "../data/images" + str(size) + "/" #directory containing the images ld = listdir(direct) #contents of that directory shuffle(ld) #shuffle the image list for randomness numEx = len(ld) #number of images in the directory outType = "ecfp" #what the CNN is predicting DUMP_WEIGHTS = True #will we dump the weights of conv layers for visualization trainTestSplit = 0.90 #percentage of data to use as training data batch_size = 32 #how many training examples per batch chunkSize = 50000 #how much data to ever load at once testChunkSize = 6000 #how many examples to evaluate per iteration """Define the folder where the model will be stored based on the input arguments""" folder = defineFolder(outType, size, lay1size, run) #folder = "../ecfp/"+str(size)+"_"+str(lay1size)+run+"/" #if not isdir(folder): # mkdir(folder) # #if (not UPDATE) and (isdir(folder)): # i=1 # oldfolder = folder # while isdir(folder): # i+=1 # folder = oldfolder[:-1]+"_"+str(i)+'/' # print folder # mkdir(folder) if update: stop = raw_input("Loading from folder " + folder +
"""Define parameters of the run""" imdim = size - 20 #strip 10 pixels buffer from each size direct = "../data/SDF/" #directory containing the SD files ld = listdir(direct) #contents of that directory shuffle(ld) #shuffle the image list for randomness numEx = len(ld) #number of images in the directory outType = "OCRfeatures" #what the CNN is predicting DUMP_WEIGHTS = True #will we dump the weights of conv layers for visualization trainTestSplit = 0.90 #percentage of data to use as training data batch_size = 32 #how many training examples per batch chunkSize = 50000 #how much data to ever load at once testChunkSize = 6000 #how many examples to evaluate per iteration """Define the folder where the model will be stored based on the input arguments""" folder = defineFolder(outType,size,lay1size,run,update) print folder trainDirect = folder+"tempTrain/" testDirect = folder+"tempTest/" #if update: # stop = raw_input("Loading from folder "+folder+" : Hit enter to proceed or ctrl+C to cancel") #else: # print "Initializing in folder "+folder """Load the train/test split information if update, else split and write out which images are in which dataset""" if update:
ld = listdir(direct) #contents of that directory shuffle(ld) #shuffle the image list for randomness numEx = len(ld) #number of images in the directory DUMP_WEIGHTS = True #will we dump the weights of conv layers for visualization trainTestSplit = 0.90 #percentage of data to use as training data batch_size = 32 #how many training examples per batch chunkSize = 50000 #how much data to ever load at once testChunkSize = 6000 #how many examples to evaluate per iteration run = "1" """Define the folder where the model will be stored based on the input arguments""" folder = helperFuncs.defineFolder(False,outType,size,run) print folder trainDirect = folder+"tempTrain/" testDirect = folder+"tempTest/" #if update: # stop = raw_input("Loading from folder "+folder+" : Hit enter to proceed or ctrl+C to cancel") #else: # print "Initializing in folder "+folder """Load the train/test split information if update, else split and write out which images are in which dataset""" trainFs, testFs = helperFuncs.getTrainTestSplit(False,folder,numEx,trainTestSplit,ld)
"""Require an argument specifying whether this is an update or a new model, parse input""" update, size, lay1size, run = helperFuncs.handleArgs(sys.argv) """Define parameters of the run""" imdim = size - 20 #strip 10 pixels buffer from each size direct = "../data/images" + str(size) + "/" #directory containing the images ld = listdir(direct) #contents of that directory numEx = len(ld) #number of images in the directory shuffle(ld) #shuffle the image list for randomness outType = "solubility" #what the CNN is predicting DUMP_WEIGHTS = True #will we dump the weights of conv layers for visualization trainTestSplit = 0.90 #percentage of data to use as training data batch_size = 32 #how many training examples per batch chunkSize = 50000 #how much data to ever load at once testChunkSize = 5000 #how many examples to evaluate per iteration """Define the folder where the model will be stored based on the input arguments""" folder = helperFuncs.defineFolder(outType, size, lay1size, run) """Load the train/test split information if update, else split and write out which images are in which dataset""" trainFs, testFs = helperFuncs.getTrainTestSplit(update, folder, numEx, trainTestSplit) trainL = len(trainFs) testL = len(testFs) print "number of examples: ", numEx print "training examples : ", trainL print "test examples : ", testL #batch_size = 32 #how many training examples per batch #chunkSize = 5000 #how much data to ever load at once #testChunkSize = 600 #how many examples to evaluate per iteration numTrainEx = min(trainL, chunkSize)
"""Define parameters of the run""" imdim = size - 20 #strip 10 pixels buffer from each size direct = "../data/images"+str(size)+"/" #directory containing the images ld = listdir(direct) #contents of that directory numEx = len(ld) #number of images in the directory shuffle(ld) #shuffle the image list for randomness outType = "solubility" #what the CNN is predicting DUMP_WEIGHTS = True #will we dump the weights of conv layers for visualization trainTestSplit = 0.90 #percentage of data to use as training data batch_size = 32 #how many training examples per batch chunkSize = 50000 #how much data to ever load at once testChunkSize = 5000 #how many examples to evaluate per iteration """Define the folder where the model will be stored based on the input arguments""" folder = helperFuncs.defineFolder(outType,size,lay1size,run) """Load the train/test split information if update, else split and write out which images are in which dataset""" trainFs, testFs = helperFuncs.getTrainTestSplit(update,folder,numEx,trainTestSplit) trainL = len(trainFs) testL = len(testFs) print "number of examples: ", numEx print "training examples : ", trainL print "test examples : ", testL #batch_size = 32 #how many training examples per batch #chunkSize = 5000 #how much data to ever load at once