def CodeChefExperiment(problem='MNMX'): # tokenvec.txt and toktypeDict.txt must be put in the data directory datafiles = {} xypath = params.xypath # problem: MNMX, FLOW016, SUBINC dtb = CodeChef(problem=problem) jsontrain = dtb.getTrainName() jsonCV = dtb.getCVName() jsontest = dtb.getTestName() print 'Construct net from:', jsontrain, jsonCV, jsontest datafiles['train'] = [ datapath + jsontrain, xypath + dtb.problem + '_gpcnn_train_Xnet', xypath + dtb.problem + '_gpcnn_train_Y.txt' ] datafiles['CV'] = [ datapath + jsonCV, xypath + dtb.problem + '_gpcnn_CV_Xnet', xypath + dtb.problem + '_gpcnn_CV_Y.txt' ] datafiles['test'] = [ datapath + jsontest, xypath + dtb.problem + '_gpcnn_test_Xnet', xypath + dtb.problem + '_gpcnn_test_Y.txt' ] # jsonfile, xfile, yfile = datafiles['train'] # constructNetFromJson(jsonFile=jsonfile, xfile=xfile, yfile=yfile) numInst = {} for fold in datafiles: jsonfile, xfile, yfile = datafiles[fold] # write X, Y to file f_x = file(xfile, 'wb') f_y = file(yfile, 'w') numInst[fold] = constructNetFromJson(jsonFile=jsonfile, f_x=f_x, f_y=f_y) f_x.close() f_y.close() # write setting content # write setting content print 'setting parameters:' commonFunctions.generateSettingContent( xypath + '../settings_' + dtb.problem + '.txt', { 'numtrain': numInst['train'], 'numcv': numInst['CV'], 'numtest': numInst['test'], 'output': numOut, 'paramFile': paramFile, 'xtrain': dtb.problem + '_gpcnn_train_Xnet', 'xcv': dtb.problem + '_gpcnn_CV_Xnet', 'xtest': dtb.problem + '_gpcnn_test_Xnet', 'ytrain': dtb.problem + '_gpcnn_train_Y.txt', 'ycv': dtb.problem + '_gpcnn_CV_Y.txt', 'ytest': dtb.problem + '_gpcnn_test_Y.txt', 'database': problem })
def saveXYForKFoldTrainCVTest(K=5, datapath='', xypath=''): print 'datapath:', datapath print 'xypath:', xypath for idx in range(1, K + 1): f_x = open(xypath + 'Fold' + str(idx) + '_Xtrain', 'wb') f_y = open(xypath + 'Fold' + str(idx) + '_Ytrain.txt', 'w') numtrain = constructNetFromJson(datapath + 'Fold' + str(idx) + '_cfg_train', f_x=f_x, f_y=f_y) f_x.close() f_y.close() f_x = open(xypath + 'Fold' + str(idx) + '_XCV', 'wb') f_y = open(xypath + 'Fold' + str(idx) + '_YCV.txt', 'w') numCV = constructNetFromJson(datapath + 'Fold' + str(idx) + '_cfg_CV', f_x=f_x, f_y=f_y) f_x.close() f_y.close() f_x = open(xypath + 'Fold' + str(idx) + '_Xtest', 'wb') f_y = open(xypath + 'Fold' + str(idx) + '_Ytest.txt', 'w') numtest = constructNetFromJson(datapath + 'Fold' + str(idx) + '_cfg_test', f_x=f_x, f_y=f_y) f_x.close() f_y.close() # write setting file commonFunctions.generateSettingContent( xypath + '../settings_Fold' + str(idx) + '.txt', { 'numtrain': numtrain, 'numcv': numCV, 'numtest': numtest, 'output': numOut, 'paramFile': paramFile, 'xtrain': 'Fold' + str(idx) + '_Xtrain', 'xcv': 'Fold' + str(idx) + '_XCV', 'xtest': 'Fold' + str(idx) + '_Xtest', 'ytrain': 'Fold' + str(idx) + '_Ytrain.txt', 'ycv': 'Fold' + str(idx) + '_YCV.txt', 'ytest': 'Fold' + str(idx) + '_Ytest.txt', 'database': 'F' + str(idx) }) f_x.close() f_y.close()
def saveXYForKFold(K=5, datapath='', xypath=''): print 'datapath:', datapath print 'xypath:', xypath for idx in range(1, K + 1): # write X, Y test from current folds f_x = open(xypath + 'Fold' + str(idx) + '_Xtest', 'wb') f_y = open(xypath + 'Fold' + str(idx) + '_Ytest.txt', 'w') numtest = constructNetFromJson(datapath + 'Fold' + str(idx) + '/dataFold' + str(idx), f_x=f_x, f_y=f_y) # return f_x.close() f_y.close() # Merge other folds, Write X, Y for training f_x = open(xypath + 'Fold' + str(idx) + '_Xtrain', 'wb') f_y = open(xypath + 'Fold' + str(idx) + '_Ytrain.txt', 'w') numtrain = 0 for idx_train in range(1, K + 1): if idx_train == idx: continue numtrain += constructNetFromJson(datapath + 'Fold' + str(idx_train) + '/dataFold' + str(idx_train), f_x=f_x, f_y=f_y) # write setting file commonFunctions.generateSettingContent( xypath + '../settings_Fold' + str(idx) + '.txt', { 'numtrain': numtrain, 'numcv': numtrain, 'numtest': numtest, 'output': numOut, 'paramFile': paramFile, 'xtrain': 'Fold' + str(idx) + '_Xtrain', 'xcv': 'Fold' + str(idx) + '_Xtrain', 'xtest': 'Fold' + str(idx) + '_Xtest', 'ytrain': 'Fold' + str(idx) + '_Ytrain.txt', 'ycv': 'Fold' + str(idx) + '_Ytrain.txt', 'ytest': 'Fold' + str(idx) + '_Ytest.txt', 'database': 'Virus' }) f_x.close() f_y.close()
def saveXYForKFold(K=5, path=''): for idx in range(1, K + 1): # write X, Y test from current folds f_x = open(path + 'Fold' + str(idx) + 'X_test', 'wb') f_y = open(path + 'Fold' + str(idx) + 'Y_test.txt', 'w') constructNetFromJson(path + 'Fold' + str(idx) + '/dataFold' + str(idx), f_x=f_x, f_y=f_y) # not reassign label f_x.close() f_y.close() # Merge other folds, Write X, Y for training f_x = open(path + 'Fold' + str(idx) + 'X_train', 'wb') f_y = open(path + 'Fold' + str(idx) + 'Y_train.txt', 'w') for idx_train in range(1, K + 1): if idx_train == idx: continue constructNetFromJson(path + 'Fold' + str(idx_train) + '/dataFold' + str(idx_train), f_x=f_x, f_y=f_y) # not reassign label f_x.close() f_y.close() print 'setting parameters:' commonFunctions.generateSettingContent( path + '../settings_' + 'Virus' + '.txt', { 'numtrain': 0, 'numcv': 0, 'numtest': 0, 'output': numOut, 'paramFile': paramFile, 'xtrain': 'FoldK_Xtrain', 'xcv': 'FoldK_Xtrain', 'xtest': 'FoldK_Xtest', 'ytrain': 'FoldK_Ytrain.txt', 'ycv': 'FoldK_Ytrain.txt', 'ytest': 'FoldK_Ytest.txt', 'database': 'Virus' })
xname[fold] = xfile yfile = dataname + '_' + netstructure + '_Y' + fold + '.txt' yname[fold] = yfile numInst[fold] = writeXY(jsonAST, common_params.xypath + xfile, common_params.xypath + yfile) commonFunctions.generateSettingContent( common_params.xypath + '../settings_' + dataname + '.txt', { 'numtrain': numInst['train'], 'numcv': numInst['CV'], 'numtest': numInst['test'], 'output': gl.numOut, 'paramFile': NetStruct.paramsFile, 'xtrain': xname['train'], 'xcv': xname['CV'], 'xtest': xname['test'], 'ytrain': yname['train'], 'ycv': yname['CV'], 'ytest': yname['test'], 'database': dataname }) # config_Paths() # # jsondir = common_params.jsondir # netstructure=NetStruct.netstructure #'tbcnn' # numInst ={} # xname ={} # yname ={}