Esempio n. 1
0
    shuffle(ld)
    num = 20000
    preds = np.zeros((num, s), dtype=np.float)
    y = np.zeros((num, s), dtype=np.float)
    count = 0
    for x in ld[:num]:
        CID = x[:x.find(".png")]
        y[count, :] = ecfps[CID]
        preds[count, :] = means
        count += 1

    print "RMSE of guessing: ", np.sqrt(mean_squared_error(y, preds))


"""Require an argument specifying whether this is an update or a new model, parse input"""
update, size, lay1size, run = handleArgs(sys.argv)

#if len(sys.argv) <= 1:
#    print "needs 'update' or 'new' as first argument"
#    sys.exit(1)
#
#if sys.argv[1].lower().strip() == "update":
#    UPDATE     = True
#    if len(sys.argv) < 5:
#        print "needs image size, layer size, run # as other inputs"
#        sys.exit(1)
#    else:
#        size = int(sys.argv[2])     #size of the images
#        lay1size = int(sys.argv[3]) #size of the first receptive field
#        run     = "_"+str(sys.argv[4].strip())
#        print size, lay1size
Esempio n. 2
0
        #cos     = distance.cosine(vec, vec2)
        euc     = distance.euclidean(vec,vec2)
        #row     = [k, cos]        
        row     = [k,euc]
        tosort.append(row)
           
    data    = np.array(tosort)    
    #sCos    = data[np.argsort(data[:,1])]   
    sEuc    = data[np.argsort(data[:,1])]
    #c   = list(sCos[:,0]).index(cid)
    c   = list(sEuc[:,0]).index(cid)
    return c, sEuc[:10]


"""Require an argument specifying whether this is an update or a new model, parse input"""
size, run, outType     = helperFuncs.handleArgs(sys.argv)


"""Define parameters of the run"""
batch_size      = 32                        #how many training examples per batch


"""Define the folder where the model will be stored based on the input arguments"""
folder          = helperFuncs.defineFolder(True,outType,size,run)
print folder
trainDirect     = folder+"tempTrain/"
trainNP         = folder+"tempTrainNP/"
testDirect      = folder+"tempTest/"
testNP          = folder+"tempTestNP/"

"""Load the train/test split information"""
Esempio n. 3
0
    shuffle(ld)
    num     = 20000
    preds   = np.zeros((num,s),dtype=np.float)
    y       = np.zeros((num,s),dtype=np.float)
    count   = 0
    for x in ld[:num]:
        CID     = x[:x.find(".png")]
        y[count,:]  = OCRfeatures[CID]
        preds[count,:] = means
        count+=1
   
    print "RMSE of guessing: ", np.sqrt(mean_squared_error(y, preds))


"""Require an argument specifying whether this is an update or a new model, parse input"""
update, size, lay1size, run     = handleArgs(sys.argv,size=300)


"""Define parameters of the run"""
imdim           = size - 20                         #strip 10 pixels buffer from each size
direct          = "../data/SDF/"            #directory containing the SD files
ld              = listdir(direct)                   #contents of that directory
shuffle(ld)                                 #shuffle the image list for randomness
numEx           = len(ld)                   #number of images in the directory
outType         = "OCRfeatures"             #what the CNN is predicting
DUMP_WEIGHTS    = True                      #will we dump the weights of conv layers for visualization
trainTestSplit  = 0.90                      #percentage of data to use as training data
batch_size      = 32                        #how many training examples per batch
chunkSize       = 50000                     #how much data to ever load at once      
testChunkSize   = 6000                      #how many examples to evaluate per iteration
Esempio n. 4
0
    preds = np.zeros((num, s), dtype=np.float)
    y = np.zeros((num, s), dtype=np.float)
    count = 0
    for x in ld[:num]:
        CID = x[:x.find(".png")]
        y[count] = targets[CID]
        preds[count] = means
        count += 1

    print "RMSE of guessing: ", np.sqrt(mean_squared_error(y, preds))


"""*************************************************************************"""
"""*************************************************************************"""
"""Require an argument specifying whether this is an update or a new model, parse input"""
update, size, lay1size, run = helperFuncs.handleArgs(sys.argv)
"""Define parameters of the run"""
imdim = size - 20  #strip 10 pixels buffer from each size
direct = "../data/images" + str(size) + "/"  #directory containing the images
ld = listdir(direct)  #contents of that directory
numEx = len(ld)  #number of images in the directory
shuffle(ld)  #shuffle the image list for randomness
outType = "solubility"  #what the CNN is predicting
DUMP_WEIGHTS = True  #will we dump the weights of conv layers for visualization
trainTestSplit = 0.90  #percentage of data to use as training data
batch_size = 32  #how many training examples per batch
chunkSize = 50000  #how much data to ever load at once
testChunkSize = 5000  #how many examples to evaluate per iteration
"""Define the folder where the model will be stored based on the input arguments"""
folder = helperFuncs.defineFolder(outType, size, lay1size, run)
"""Load the train/test split information if update, else split and write out which images are in which dataset"""
Esempio n. 5
0
def getRank(cid,vec,allVec):

    tosort  = []    
    for k,vec2 in allVec.iteritems():
        cos     = distance.cosine(vec, vec2)
        row     = [k, cos]        
        tosort.append(row)
           
    data    = np.array(tosort)    
    sCos    = data[np.argsort(data[:,1])]   
    c   = list(sCos[:,0]).index(cid)
    return c, sCos[:10]


"""Require an argument specifying whether this is an update or a new model, parse input"""
size, run, outType     = helperFuncs.handleArgs(sys.argv)


"""Define parameters of the run"""
batch_size      = 32                        #how many training examples per batch


"""Define the folder where the model will be stored based on the input arguments"""
folder          = helperFuncs.defineFolder(True,outType,size,run)
print folder
trainDirect     = folder+"tempTrain/"
trainNP         = folder+"tempTrainNP/"
testDirect      = folder+"tempTest/"
testNP          = folder+"tempTestNP/"

"""Load the train/test split information"""
    y       = np.zeros((num,s),dtype=np.float)
    count   = 0
    for x in ld[:num]:
        CID     = x[:x.find(".png")]
        y[count]  = targets[CID]
        preds[count] = means
        count+=1
   
    print "RMSE of guessing: ", np.sqrt(mean_squared_error(y, preds))


"""*************************************************************************"""
"""*************************************************************************"""

"""Require an argument specifying whether this is an update or a new model, parse input"""
update, size, lay1size, run     = helperFuncs.handleArgs(sys.argv)



"""Define parameters of the run"""
imdim   = size - 20                         #strip 10 pixels buffer from each size
direct  = "../data/images"+str(size)+"/"    #directory containing the images
ld      = listdir(direct)                   #contents of that directory
numEx   = len(ld)                           #number of images in the directory
shuffle(ld)                                 #shuffle the image list for randomness
outType = "solubility"                      #what the CNN is predicting
DUMP_WEIGHTS = True                         #will we dump the weights of conv layers for visualization
trainTestSplit   = 0.90                     #percentage of data to use as training data
batch_size      = 32                        #how many training examples per batch
chunkSize       = 50000                     #how much data to ever load at once      
testChunkSize   = 5000                      #how many examples to evaluate per iteration