shuffle(ld) num = 20000 preds = np.zeros((num, s), dtype=np.float) y = np.zeros((num, s), dtype=np.float) count = 0 for x in ld[:num]: CID = x[:x.find(".png")] y[count, :] = ecfps[CID] preds[count, :] = means count += 1 print "RMSE of guessing: ", np.sqrt(mean_squared_error(y, preds)) """Require an argument specifying whether this is an update or a new model, parse input""" update, size, lay1size, run = handleArgs(sys.argv) #if len(sys.argv) <= 1: # print "needs 'update' or 'new' as first argument" # sys.exit(1) # #if sys.argv[1].lower().strip() == "update": # UPDATE = True # if len(sys.argv) < 5: # print "needs image size, layer size, run # as other inputs" # sys.exit(1) # else: # size = int(sys.argv[2]) #size of the images # lay1size = int(sys.argv[3]) #size of the first receptive field # run = "_"+str(sys.argv[4].strip()) # print size, lay1size
#cos = distance.cosine(vec, vec2) euc = distance.euclidean(vec,vec2) #row = [k, cos] row = [k,euc] tosort.append(row) data = np.array(tosort) #sCos = data[np.argsort(data[:,1])] sEuc = data[np.argsort(data[:,1])] #c = list(sCos[:,0]).index(cid) c = list(sEuc[:,0]).index(cid) return c, sEuc[:10] """Require an argument specifying whether this is an update or a new model, parse input""" size, run, outType = helperFuncs.handleArgs(sys.argv) """Define parameters of the run""" batch_size = 32 #how many training examples per batch """Define the folder where the model will be stored based on the input arguments""" folder = helperFuncs.defineFolder(True,outType,size,run) print folder trainDirect = folder+"tempTrain/" trainNP = folder+"tempTrainNP/" testDirect = folder+"tempTest/" testNP = folder+"tempTestNP/" """Load the train/test split information"""
shuffle(ld) num = 20000 preds = np.zeros((num,s),dtype=np.float) y = np.zeros((num,s),dtype=np.float) count = 0 for x in ld[:num]: CID = x[:x.find(".png")] y[count,:] = OCRfeatures[CID] preds[count,:] = means count+=1 print "RMSE of guessing: ", np.sqrt(mean_squared_error(y, preds)) """Require an argument specifying whether this is an update or a new model, parse input""" update, size, lay1size, run = handleArgs(sys.argv,size=300) """Define parameters of the run""" imdim = size - 20 #strip 10 pixels buffer from each size direct = "../data/SDF/" #directory containing the SD files ld = listdir(direct) #contents of that directory shuffle(ld) #shuffle the image list for randomness numEx = len(ld) #number of images in the directory outType = "OCRfeatures" #what the CNN is predicting DUMP_WEIGHTS = True #will we dump the weights of conv layers for visualization trainTestSplit = 0.90 #percentage of data to use as training data batch_size = 32 #how many training examples per batch chunkSize = 50000 #how much data to ever load at once testChunkSize = 6000 #how many examples to evaluate per iteration
preds = np.zeros((num, s), dtype=np.float) y = np.zeros((num, s), dtype=np.float) count = 0 for x in ld[:num]: CID = x[:x.find(".png")] y[count] = targets[CID] preds[count] = means count += 1 print "RMSE of guessing: ", np.sqrt(mean_squared_error(y, preds)) """*************************************************************************""" """*************************************************************************""" """Require an argument specifying whether this is an update or a new model, parse input""" update, size, lay1size, run = helperFuncs.handleArgs(sys.argv) """Define parameters of the run""" imdim = size - 20 #strip 10 pixels buffer from each size direct = "../data/images" + str(size) + "/" #directory containing the images ld = listdir(direct) #contents of that directory numEx = len(ld) #number of images in the directory shuffle(ld) #shuffle the image list for randomness outType = "solubility" #what the CNN is predicting DUMP_WEIGHTS = True #will we dump the weights of conv layers for visualization trainTestSplit = 0.90 #percentage of data to use as training data batch_size = 32 #how many training examples per batch chunkSize = 50000 #how much data to ever load at once testChunkSize = 5000 #how many examples to evaluate per iteration """Define the folder where the model will be stored based on the input arguments""" folder = helperFuncs.defineFolder(outType, size, lay1size, run) """Load the train/test split information if update, else split and write out which images are in which dataset"""
def getRank(cid,vec,allVec): tosort = [] for k,vec2 in allVec.iteritems(): cos = distance.cosine(vec, vec2) row = [k, cos] tosort.append(row) data = np.array(tosort) sCos = data[np.argsort(data[:,1])] c = list(sCos[:,0]).index(cid) return c, sCos[:10] """Require an argument specifying whether this is an update or a new model, parse input""" size, run, outType = helperFuncs.handleArgs(sys.argv) """Define parameters of the run""" batch_size = 32 #how many training examples per batch """Define the folder where the model will be stored based on the input arguments""" folder = helperFuncs.defineFolder(True,outType,size,run) print folder trainDirect = folder+"tempTrain/" trainNP = folder+"tempTrainNP/" testDirect = folder+"tempTest/" testNP = folder+"tempTestNP/" """Load the train/test split information"""
y = np.zeros((num,s),dtype=np.float) count = 0 for x in ld[:num]: CID = x[:x.find(".png")] y[count] = targets[CID] preds[count] = means count+=1 print "RMSE of guessing: ", np.sqrt(mean_squared_error(y, preds)) """*************************************************************************""" """*************************************************************************""" """Require an argument specifying whether this is an update or a new model, parse input""" update, size, lay1size, run = helperFuncs.handleArgs(sys.argv) """Define parameters of the run""" imdim = size - 20 #strip 10 pixels buffer from each size direct = "../data/images"+str(size)+"/" #directory containing the images ld = listdir(direct) #contents of that directory numEx = len(ld) #number of images in the directory shuffle(ld) #shuffle the image list for randomness outType = "solubility" #what the CNN is predicting DUMP_WEIGHTS = True #will we dump the weights of conv layers for visualization trainTestSplit = 0.90 #percentage of data to use as training data batch_size = 32 #how many training examples per batch chunkSize = 50000 #how much data to ever load at once testChunkSize = 5000 #how many examples to evaluate per iteration