Esempi in Python per defineFolder, esempi in Python per helperFuncs.defineFolder

Esempio n. 1

0

Mostra file

size = 300  #EDIT ME!   #how large the images are
outType = "justbonds"  #EDIT ME!   #what the CNN is predicting

#imdim           = size - 20                 #strip 10 pixels buffer from each size
direct = "../data/SDF/"  #directory containing the SD files
ld = listdir(direct)  #contents of that directory
shuffle(ld)  #shuffle the image list for randomness
numEx = len(ld)  #number of images in the directory
DUMP_WEIGHTS = True  #will we dump the weights of conv layers for visualization
trainTestSplit = 0.90  #percentage of data to use as training data
batch_size = 32  #how many training examples per batch
chunkSize = 50000  #how much data to ever load at once
testChunkSize = 6000  #how many examples to evaluate per iteration
run = "1"
"""Define the folder where the model will be stored based on the input arguments"""
folder = helperFuncs.defineFolder(False, outType, size, run)
print folder
trainDirect = folder + "tempTrain/"
testDirect = folder + "tempTest/"

#if update:
#    stop = raw_input("Loading from folder "+folder+" : Hit enter to proceed or ctrl+C to cancel")
#else:
#    print "Initializing in folder "+folder
"""Load the train/test split information if update, else split and write out which images are in which dataset"""
trainFs, testFs = helperFuncs.getTrainTestSplit(False, folder, numEx,
                                                trainTestSplit, ld)
trainL = len(trainFs)
testL = len(testFs)

print "number of examples: ", numEx

Esempio n. 2

0

Mostra file

File: trainECFPdeep.py Progetto: jamesmf/molecularFormula

#    depth   = 2                           #size of the first receptive field
#    run     = ""
"""Define parameters of the run"""
imdim = size - 20  #strip 10 pixels buffer from each size
direct = "../data/images" + str(size) + "/"  #directory containing the images
ld = listdir(direct)  #contents of that directory
shuffle(ld)  #shuffle the image list for randomness
numEx = len(ld)  #number of images in the directory
outType = "ecfp"  #what the CNN is predicting
DUMP_WEIGHTS = True  #will we dump the weights of conv layers for visualization
trainTestSplit = 0.90  #percentage of data to use as training data
batch_size = 32  #how many training examples per batch
chunkSize = 50000  #how much data to ever load at once
testChunkSize = 6000  #how many examples to evaluate per iteration
"""Define the folder where the model will be stored based on the input arguments"""
folder = defineFolder(outType, size, lay1size, run)
#folder  = "../ecfp/"+str(size)+"_"+str(lay1size)+run+"/"
#if not isdir(folder):
#    mkdir(folder)
#
#if (not UPDATE) and (isdir(folder)):
#    i=1
#    oldfolder = folder
#    while isdir(folder):
#        i+=1
#        folder  = oldfolder[:-1]+"_"+str(i)+'/'
#        print folder
#    mkdir(folder)

if update:
    stop = raw_input("Loading from folder " + folder +

Esempio n. 3

0

Mostra file

File: trainOCRMin.py Progetto: jamesmf/molecularFormula

"""Define parameters of the run"""
imdim           = size - 20                         #strip 10 pixels buffer from each size
direct          = "../data/SDF/"            #directory containing the SD files
ld              = listdir(direct)                   #contents of that directory
shuffle(ld)                                 #shuffle the image list for randomness
numEx           = len(ld)                   #number of images in the directory
outType         = "OCRfeatures"             #what the CNN is predicting
DUMP_WEIGHTS    = True                      #will we dump the weights of conv layers for visualization
trainTestSplit  = 0.90                      #percentage of data to use as training data
batch_size      = 32                        #how many training examples per batch
chunkSize       = 50000                     #how much data to ever load at once      
testChunkSize   = 6000                      #how many examples to evaluate per iteration

"""Define the folder where the model will be stored based on the input arguments"""
folder          = defineFolder(outType,size,lay1size,run,update)
print folder
trainDirect     = folder+"tempTrain/"
testDirect      = folder+"tempTest/"

#if update:     
#    stop = raw_input("Loading from folder "+folder+" : Hit enter to proceed or ctrl+C to cancel")
#else:
#    print "Initializing in folder "+folder





"""Load the train/test split information if update, else split and write out which images are in which dataset"""
if update:

Esempio n. 4

0

Mostra file

File: OCR300_5BatchNormConv.py Progetto: jamesmf/molecularFormula

ld              = listdir(direct)                   #contents of that directory
shuffle(ld)                                 #shuffle the image list for randomness
numEx           = len(ld)                   #number of images in the directory
DUMP_WEIGHTS    = True                      #will we dump the weights of conv layers for visualization
trainTestSplit  = 0.90                      #percentage of data to use as training data
batch_size      = 32                        #how many training examples per batch
chunkSize       = 50000                     #how much data to ever load at once      
testChunkSize   = 6000                      #how many examples to evaluate per iteration
run             = "1"





"""Define the folder where the model will be stored based on the input arguments"""
folder          = helperFuncs.defineFolder(False,outType,size,run)
print folder
trainDirect     = folder+"tempTrain/"
testDirect      = folder+"tempTest/"

#if update:     
#    stop = raw_input("Loading from folder "+folder+" : Hit enter to proceed or ctrl+C to cancel")
#else:
#    print "Initializing in folder "+folder





"""Load the train/test split information if update, else split and write out which images are in which dataset"""
trainFs, testFs     = helperFuncs.getTrainTestSplit(False,folder,numEx,trainTestSplit,ld)

Esempio n. 5

0

Mostra file

"""Require an argument specifying whether this is an update or a new model, parse input"""
update, size, lay1size, run = helperFuncs.handleArgs(sys.argv)
"""Define parameters of the run"""
imdim = size - 20  #strip 10 pixels buffer from each size
direct = "../data/images" + str(size) + "/"  #directory containing the images
ld = listdir(direct)  #contents of that directory
numEx = len(ld)  #number of images in the directory
shuffle(ld)  #shuffle the image list for randomness
outType = "solubility"  #what the CNN is predicting
DUMP_WEIGHTS = True  #will we dump the weights of conv layers for visualization
trainTestSplit = 0.90  #percentage of data to use as training data
batch_size = 32  #how many training examples per batch
chunkSize = 50000  #how much data to ever load at once
testChunkSize = 5000  #how many examples to evaluate per iteration
"""Define the folder where the model will be stored based on the input arguments"""
folder = helperFuncs.defineFolder(outType, size, lay1size, run)
"""Load the train/test split information if update, else split and write out which images are in which dataset"""
trainFs, testFs = helperFuncs.getTrainTestSplit(update, folder, numEx,
                                                trainTestSplit)
trainL = len(trainFs)
testL = len(testFs)

print "number of examples: ", numEx
print "training examples : ", trainL
print "test examples : ", testL

#batch_size      = 32            #how many training examples per batch
#chunkSize       = 5000          #how much data to ever load at once
#testChunkSize   = 600           #how many examples to evaluate per iteration
numTrainEx = min(trainL, chunkSize)

Esempio n. 6

0

Mostra file

File: trainSolubility.py Progetto: jamesmf/molecularFormula

"""Define parameters of the run"""
imdim   = size - 20                         #strip 10 pixels buffer from each size
direct  = "../data/images"+str(size)+"/"    #directory containing the images
ld      = listdir(direct)                   #contents of that directory
numEx   = len(ld)                           #number of images in the directory
shuffle(ld)                                 #shuffle the image list for randomness
outType = "solubility"                      #what the CNN is predicting
DUMP_WEIGHTS = True                         #will we dump the weights of conv layers for visualization
trainTestSplit   = 0.90                     #percentage of data to use as training data
batch_size      = 32                        #how many training examples per batch
chunkSize       = 50000                     #how much data to ever load at once      
testChunkSize   = 5000                      #how many examples to evaluate per iteration

"""Define the folder where the model will be stored based on the input arguments"""
folder     = helperFuncs.defineFolder(outType,size,lay1size,run)


"""Load the train/test split information if update, else split and write out which images are in which dataset"""
trainFs, testFs     = helperFuncs.getTrainTestSplit(update,folder,numEx,trainTestSplit)
trainL  = len(trainFs)
testL   = len(testFs)
   

print "number of examples: ", numEx
print "training examples : ", trainL
print "test examples : ", testL


#batch_size      = 32            #how many training examples per batch
#chunkSize       = 5000          #how much data to ever load at once