def main(): parser = argparse.ArgumentParser(description='Generates train.r. A sample command is mGenForE.py -e ob/e1/ ') parser.add_argument('-e', required=True,help='Experiement folder to use to find the features and targets') parser.add_argument('-a', required=True,help='Algorithm name') parser.add_argument('-s', required=True,help='Location of the folder containing all the sub experiments') parser.add_argument('-targetClass',required=True,help="binomial(target takes only true and false) / multinomial (target values takes more than 2 values)") parser.add_argument('-skipM',required=False,help="yes or no , If you want to regenerate already generated algorithm model file then make this value No") parser.add_argument('-skipP',required=False,help="yes or no , If you want to regenerate already generated algorithm prediction file then make this value No") parser.add_argument('-pd', required=True,help='Prediction directory') parser.add_argument('-td',required=True,help="Day on which it was trained") parser.add_argument('-dt',required=True,help="Number of days it was trained") parser.add_argument('-wt',required=True,help="default/exp , weight type to be given to different days") parser.add_argument('-iT',required=False,help='Instrument name') parser.add_argument('-sP',required=False,help='Strike price of instrument') parser.add_argument('-oT',required=False,help='Options Type') args = parser.parse_args() attribute.initializeInstDetails(args.iT,args.sP,args.oT) if args.skipM == None: args.skipM = "yes" if args.skipP == None: args.skipP = "yes" print "Using the experiment folder " + args.e print "Training files steps" config = ConfigObj(args.e+"/design.ini") print "The config parameters that I am working with are" print config predictionDataDirectoryName = args.pd.replace('/ro/','/wf/') predictionDataDirectoryName = predictionDataDirectoryName + "/p/" + os.path.basename(os.path.dirname(args.e)) if not os.path.exists(predictionDataDirectoryName): os.mkdir(predictionDataDirectoryName) dirName=os.path.dirname(args.e) algo = rCodeGen.getAlgoName(args) args.s = args.s + "/" rProgName = "trainPredict"+ algo + "-td." + os.path.basename(os.path.abspath(args.td)) + "-dt." + args.dt +\ "-pd." + os.path.basename(os.path.abspath(args.pd)) + "-wt." + args.wt+ attribute.generateExtension() + "-For"+os.path.basename(os.path.dirname(args.s))+"SubE.r" rProgLocation = dirName+'/'+rProgName rScript = open(rProgLocation,'w') rScript.write('#!/usr/bin/Rscript \n') if(algo == 'glmnet'): rScript.write('require (glmnet) \n') elif(algo == 'randomForest'): rScript.write('require (randomForest) \n') elif(algo == 'mda'): rScript.write('require (mda) \n') rCodeGen.ForSetUpChecksForTrainPredictTogather(rScript) rCodeGen.ToReadTargetFile(rScript,config) rCodeGen.ForWtVectorGeneration(rScript,args.wt.lower()) for target in config['target']: rCodeGen.ToReadFeatureFiles(rScript,config,target,2) rCodeGen.ForSanityChecks(rScript,config,target) print "For prediction data set" configForPredictions = ConfigObj(args.e+"/design.ini") print "The config parameters that I am working with are" for target in configForPredictions['target']: feature_keys = configForPredictions['features-'+target].keys() features = configForPredictions['features-'+target] for key in feature_keys: new_key = key + "P" features[new_key] = features[key] del features[key] print configForPredictions rCodeGen.ToReadFeatureFiles(rScript,configForPredictions,target,4) rCodeGen.ForSanityChecks(rScript,configForPredictions,target) designFiles = utility.list_files(args.s) for designFile in designFiles: print "Generating r code for " + designFile rScript.write('\n\nprint ("Running r code for ' + designFile + '")\n') config = ConfigObj(designFile) configForPredictions = ConfigObj(designFile) #--------------MODEL-------------------- for target in config['target']: feature_keys = configForPredictions['features-'+target].keys() features = configForPredictions['features-'+target] for key in feature_keys: new_key = key + "P" features[new_key] = features[key] del features[key] lModelGeneratedAfterTraining = os.path.dirname(designFile) + '/' + algo + target + '-td.' + os.path.basename(os.path.abspath(args.td)) + \ '-dt.' + args.dt + '-targetClass.' + args.targetClass + "-wt." + args.wt+ attribute.generateExtension() + '.model' if os.path.isfile(lModelGeneratedAfterTraining)and ( args.skipM.lower() == "yes" ): print "Model File " + lModelGeneratedAfterTraining + " already exists . So it will not be formed again . If you want to re-generate model then re-run with -skipM=No" else: rCodeGen.ToCreateDataFrameForTraining(rScript,config,target) rCodeGen.ForTraining(rScript,args,config,target) rCodeGen.saveTrainingModel(rScript,args,os.path.dirname(designFile),target) #--------------Prediction Part-------------------- predictionFileName = predictionDataDirectoryName + "/" + args.a + target + '-td.' + os.path.basename(os.path.abspath(args.td)) + \ '-dt.' + args.dt + '-targetClass.' + args.targetClass + '-f.' + os.path.basename(os.path.dirname(designFile)) +\ "-wt." + args.wt+ attribute.generateExtension() +".predictions" if not os.path.isfile(predictionFileName) or ( args.skipP.lower() == "no" ): rCodeGen.ForPredictions(rScript,configForPredictions,args,designFile,target,4) else: print "Prediction File " + predictionFileName + "Already exists , not generating it again . If you want to generate it again then rerun it with -skipP no " rScript.write('rm(list=ls())') rScript.close() print "Finished generating R training program: " + rProgLocation os.system("chmod +x "+rProgLocation)
def main(): parser = argparse.ArgumentParser( description= 'Generates train.r. A sample command is mGenForE.py -e ob/e1/ ') parser.add_argument( '-e', required=True, help='Experiement folder to use to find the features and targets') parser.add_argument('-a', required=True, help='Algorithm name') parser.add_argument( '-targetClass', required=True, help= "binomial(target takes only true and false) / multinomial (target values takes more than 2 values)" ) parser.add_argument( '-skipT', required=False, help= "yes or no , If you want to regenerate already generated algorithm model file then make this value No" ) parser.add_argument('-td', required=True, help="Day on which it was trained") parser.add_argument('-dt', required=True, help="Number of days it was trained") parser.add_argument( '-wt', required=True, help="default/exp , weight type to be given to different days") parser.add_argument('-iT', required=False, help='Instrument name') parser.add_argument('-sP', required=False, help='Strike price of instrument') parser.add_argument('-oT', required=False, help='Options Type') parser.add_argument('-treeType', required=False, help="Tree read for trade engine") parser.add_argument('-tTD', required=False, help="Tree number of days to be used") args = parser.parse_args() attribute.initializeInstDetails(args.iT, args.sP, args.oT) if args.skipT == None: args.skipT = "yes" if args.tTD == None: args.tTD = args.dt print "Using the experiment folder " + args.e config = ConfigObj(args.e + "/design1.ini") configInit = ConfigObj(args.e + "design.ini") # configInitList = [] # for iniFile in os.listdir(args.e + "/"): # if '.ini' in iniFile and iniFile != 'design.ini': # index = iniFile[ file.index(".") - 1 ] # configInitList.append( ( index, ConfigObj(args.e+"/"+iniFile) ) ) # configInit = dict(configInitList) print "The config parameters that I am working with are" print config dirName = os.path.dirname(args.e) + "/" algo = rCodeGen.getAlgoName(args) rProgName = "traintree" + "-td." + os.path.basename( os.path.abspath(args.td) ) + "-tTD" + args.tTD + "-dt." + args.dt + "-wt." + args.wt + attribute.generateExtension( ) + ".r" rProgLocation = dirName + '/' + rProgName rScript = open(rProgLocation, 'w') rScript.write('#!/usr/bin/Rscript \n') rScript.write('require (rpart) \n') rCodeGen.ForSetUpChecks(rScript) lAllFilePresent = True for target in config['target']: lTreeFileName = dirName + "/" + algo + target + '-td.' + os.path.basename( os.path.abspath(args.td) ) + "-tTD" + args.tTD + '-dt.' + args.dt + attribute.generateExtension( ) + ".tree" + args.treeType if os.path.isfile(lTreeFileName) and (args.skipT.lower() == "yes"): continue else: lAllFilePresent = False break if lAllFilePresent == False: rCodeGen.ToReadTargetFile(rScript, config) rCodeGen.ForWtVectorGeneration(rScript, args.wt.lower()) for target in config['target']: rCodeGen.ToReadFeatureFiles(rScript, config, target) rCodeGen.ToReadPredictionFiles(rScript, config, target, configInit) rCodeGen.ForSanityChecks(rScript, config, target) lTreeFileName = dirName + "/" + algo + target + '-td.' + os.path.basename( os.path.abspath(args.td) ) + "-tTD" + args.tTD + '-dt.' + args.dt + attribute.generateExtension( ) + ".tree" + args.treeType if os.path.isfile(lTreeFileName) and (args.skipT.lower() == "yes"): print "Model File " + lTreeFileName + " already exists . So it will not be formed again . If you want to re-generate model then re-run with -skipT=No" else: rCodeGen.ToRenameDataBeforeTraining(rScript, config, target) rCodeGen.ForTrainingTree(rScript, args, config, target, args.treeType) print lTreeFileName rCodeGen.saveTrainingTree(rScript, args, dirName, target, lTreeFileName) rScript.close() print "Finished generating R training program: " + rProgLocation os.system("chmod +x " + rProgLocation)
def main(): parser = argparse.ArgumentParser(description='Generates train.r. A sample command is mGenForE.py -e ob/e1/ ') parser.add_argument('-e', required=True,help='Experiement folder to use to find the features and targets') parser.add_argument('-a', required=True,help='Algorithm name') parser.add_argument('-targetClass',required=True,help="binomial(target takes only true and false) / multinomial (target values takes more than 2 values)") parser.add_argument('-skipM',required=False,help="yes or no , If you want to regenerate already generated algorithm model file then make this value No") parser.add_argument('-td',required=True,help="Day on which it was trained") parser.add_argument('-dt',required=True,help="Number of days it was trained") parser.add_argument('-wt',required=True,help="default/exp , weight type to be given to different days") parser.add_argument('-iT',required=False,help='Instrument name') parser.add_argument('-sP',required=False,help='Strike price of instrument') parser.add_argument('-oT',required=False,help='Options Type') parser.add_argument('-double',required=False,help='Double training of in model') args = parser.parse_args() attribute.initializeInstDetails(args.iT,args.sP,args.oT) if args.skipM == None: args.skipM = "yes" print "Using the experiment folder " + args.e config = ConfigObj(args.e+"/design.ini") print "The config parameters that I am working with are" print config dirName=os.path.dirname(args.e)+"/" algo = rCodeGen.getAlgoName(args) if args.double: rProgName = "train" + algo + "-td." + os.path.basename(os.path.abspath(args.td)) + "-dt." + args.dt + "-wt." + args.wt + attribute.generateExtension() +"double.r" else: rProgName = "train" + algo + "-td." + os.path.basename(os.path.abspath(args.td)) + "-dt." + args.dt + "-wt." + args.wt + attribute.generateExtension() +".r" rProgLocation = dirName+'/'+rProgName rScript = open(rProgLocation,'w') rScript.write('#!/usr/bin/Rscript \n') if(algo == 'glmnet'): rScript.write('require (glmnet) \n') elif(algo == 'randomForest'): rScript.write('require (randomForest) \n') elif(algo == 'mda'): rScript.write('require (mda) \n') rCodeGen.ForSetUpChecks(rScript) lAllFilePresent = True for target in config['target']: if args.double: lModelGeneratedAfterTraining = dirName + '/' + algo + target + '-td.' + os.path.basename(os.path.abspath(args.td))\ + '-dt.' + args.dt + '-targetClass.' + args.targetClass + "-wt." + args.wt+ attribute.generateExtension() +'double.model' else: lModelGeneratedAfterTraining = dirName + '/' + algo + target + '-td.' + os.path.basename(os.path.abspath(args.td))\ + '-dt.' + args.dt + '-targetClass.' + args.targetClass + "-wt." + args.wt+ attribute.generateExtension() +'.model' if os.path.isfile(lModelGeneratedAfterTraining) and ( args.skipM.lower() == "yes" ): continue else: lAllFilePresent = False break if lAllFilePresent == False: rCodeGen.ToReadTargetFile(rScript,config) rCodeGen.ForWtVectorGeneration(rScript,args.wt.lower()) for target in config['target']: rCodeGen.ToReadFeatureFiles(rScript,config,target) rCodeGen.ForSanityChecks(rScript,config,target) if args.double: lModelGeneratedAfterTraining = dirName + '/' + algo + target + '-td.' + os.path.basename(os.path.abspath(args.td))\ + '-dt.' + args.dt + '-targetClass.' + args.targetClass + "-wt." + args.wt+ attribute.generateExtension() +'double.model' lTempModelName = dirName + '/' + algo + target + '-td.' + os.path.basename(os.path.abspath(args.td))\ + '-dt.' + args.dt + '-targetClass.' + args.targetClass + "-wt." + args.wt+ attribute.generateExtension() +'.model' else: lModelGeneratedAfterTraining = dirName + '/' + algo + target + '-td.' + os.path.basename(os.path.abspath(args.td))\ + '-dt.' + args.dt + '-targetClass.' + args.targetClass + "-wt." + args.wt+ attribute.generateExtension() +'.model' if os.path.isfile(lModelGeneratedAfterTraining) and ( args.skipM.lower() == "yes" ): print "Model File " + lModelGeneratedAfterTraining + " already exists . So it will not be formed again . So it will not be formed again . If you want to re-generate model then re-run with -skipM=No" else: rCodeGen.ToCreateDataFrameForTraining(rScript,config,target) if args.double: if os.path.isfile(lTempModelName): rCodeGen.ForLoadingModel(rScript,args,dirName,target,config) else: rCodeGen.ForTraining(rScript,args,config,target) rCodeGen.forPreparingWtVectorForDoubleTraining(rScript,args,target) rCodeGen.saveTrainingModel(rScript,args,dirName,target,"double") else: rCodeGen.ForTraining(rScript,args,config,target) rCodeGen.saveTrainingModel(rScript,args,dirName,target) rScript.close() print "Finished generating R training program: " + rProgLocation os.system("chmod +x "+rProgLocation)
def main(): parser = argparse.ArgumentParser( description= 'Generates predict.r which will use design.model to make predictions. Sample command is pGenForE.py -e ob/e1/' ) parser.add_argument('-e', required=True, help='Directory to find the experiement designs') parser.add_argument('-a', required=True, help='Algorithm name') parser.add_argument('-pd', required=True, help='Prediction directory') parser.add_argument('-td', required=True, help="Day on which it was trained") parser.add_argument('-dt', required=True, help="Number of days it was trained") parser.add_argument('-wt', required=True, help="exp/default") parser.add_argument( '-targetClass', required=True, help= "For which model was used ; binomial(target takes only true and false) / multinomial (target values takes more than 2 values)" ) parser.add_argument( '-skipP', required=False, help= "yes or no , If you want to regenerate already generated algorithm prediction file then make this value No" ) parser.add_argument('-s', required=False, help="Experiment sub folders") parser.add_argument('-iT', required=False, help='Instrument name') parser.add_argument('-sP', required=False, help='Strike price of instrument') parser.add_argument('-oT', required=False, help='Options Type') parser.add_argument('-double', required=False, help='Double training of in model') args = parser.parse_args() attribute.initializeInstDetails(args.iT, args.sP, args.oT) if args.skipP == None: args.skipP = "yes" if args.s == None: args.s = args.e print "\nRunning pGen.py to generate the predict script" print "Using the experiment folder " + args.e config = ConfigObj(args.s + "/design.ini") print "The config parameters that I am working with are" print config dirName = os.path.dirname(args.s) if args.a is None: algo = 'glmnet' else: algo = args.a import pdb #pdb.set_trace() if args.double: rProgName = "predict" + algo + "-td." + os.path.basename(os.path.abspath(args.td)) + "-dt." + args.dt + "-pd." + os.path.basename(os.path.abspath(args.pd)) \ + "-wt." + args.wt+ attribute.generateExtension() + "double.r" else: rProgName = "predict" + algo + "-td." + os.path.basename(os.path.abspath(args.td)) + "-dt." + args.dt + "-pd." + os.path.basename(os.path.abspath(args.pd)) \ + "-wt." + args.wt+ attribute.generateExtension() + ".r" rProgLocation = dirName + '/' + rProgName rScript = open(rProgLocation, 'w') rScript.write('#!/usr/bin/Rscript \n') predictDataDirectoryName = args.pd.replace('/ro/', '/wf/') predictDataDirectoryName = predictDataDirectoryName + "/p/" + os.path.basename( os.path.dirname(args.e)) + "/" if not os.path.exists(predictDataDirectoryName): os.mkdir(predictDataDirectoryName) if (args.a == 'glmnet'): rScript.write('require (glmnet) \n') elif (args.a == 'randomForest'): rScript.write('require (randomForest) \n') rCodeGen.ForSetUpChecks(rScript) lAllFilePresent = True for target in config['target']: if args.double: predictionFileName = predictDataDirectoryName + "/" + args.a + target +'-td.' + os.path.basename(os.path.abspath(args.td)) \ + '-dt.' + args.dt + '-targetClass.' + args.targetClass + '-f.' + os.path.basename(os.path.dirname(args.s)) + \ "-wt." + args.wt+ attribute.generateExtension() +"double.predictions" else: predictionFileName = predictDataDirectoryName + "/" + args.a + target +'-td.' + os.path.basename(os.path.abspath(args.td)) \ + '-dt.' + args.dt + '-targetClass.' + args.targetClass + '-f.' + os.path.basename(os.path.dirname(args.s)) + \ "-wt." + args.wt+ attribute.generateExtension() +".predictions" if os.path.isfile(predictionFileName) and (args.skipP.lower() == "yes"): continue else: lAllFilePresent = False break if lAllFilePresent == False: for target in config['target']: rCodeGen.ToReadFeatureFiles(rScript, config, target) rCodeGen.ForSanityChecks(rScript, config, target) if args.double: predictionFileName = predictDataDirectoryName + "/" + args.a + target +'-td.' + os.path.basename(os.path.abspath(args.td)) \ + '-dt.' + args.dt + '-targetClass.' + args.targetClass + '-f.' + os.path.basename(os.path.dirname(args.s)) + \ "-wt." + args.wt+ attribute.generateExtension() +"double.predictions" else: predictionFileName = predictDataDirectoryName + "/" + args.a + target +'-td.' + os.path.basename(os.path.abspath(args.td)) \ + '-dt.' + args.dt + '-targetClass.' + args.targetClass + '-f.' + os.path.basename(os.path.dirname(args.s)) + \ "-wt." + args.wt+ attribute.generateExtension() +".predictions" if not os.path.isfile(predictionFileName) or (args.skipP.lower() == "no"): if args.double: lModelGeneratedAfterTraining = args.s + '/' + args.a + target + '-td.' + os.path.basename( os.path.abspath(args.td) ) + '-dt.' + args.dt + '-targetClass.' + args.targetClass + "-wt." + args.wt + 'double.model' rCodeGen.ForPredictions(rScript, config, args, args.s, target, 2, "double") else: lModelGeneratedAfterTraining = args.s + '/' + args.a + target + '-td.' + os.path.basename( os.path.abspath(args.td) ) + '-dt.' + args.dt + '-targetClass.' + args.targetClass + "-wt." + args.wt + '.model' rCodeGen.ForPredictions(rScript, config, args, args.s, target) print lModelGeneratedAfterTraining else: print predictionFileName + "Already exists , not generating it again . If you want to generate it again then rerun it with -skipP no " rScript.close() print "Finished generating R prediction program: " + rProgLocation os.system("chmod +x " + rProgLocation)
def main(): parser = argparse.ArgumentParser( description= 'Generates predict.r which will use design.model to make predictions. Sample command is pGenForE.py -e ob/e1/' ) parser.add_argument('-e', required=True, help='Directory to find the experiement designs') parser.add_argument('-a', required=True, help='Algorithm name') parser.add_argument( '-s', required=True, help='Location of the subfolder that contains the sub experiments') parser.add_argument('-pd', required=True, help='Prediction directory') parser.add_argument('-td', required=True, help="Day on which it was trained") parser.add_argument('-dt', required=True, help="Number of days it was trained") parser.add_argument( '-targetClass', required=True, help= "For which model was used ; binomial(target takes only true and false) / multinomial (target values takes more than 2 values)" ) parser.add_argument( '-skipP', required=False, help= "yes or no , If you want to regenerate already generated algorithm prediction file then make this value No" ) parser.add_argument( '-wt', required=False, help="default/exp , weight type to be given to different days") parser.add_argument('-iT', required=False, help='Instrument name') parser.add_argument('-sP', required=False, help='Strike price of instrument') parser.add_argument('-oT', required=False, help='Options Type') args = parser.parse_args() attribute.initializeInstDetails(args.iT, args.sP, args.oT) if args.skipP == None: args.skipP = "yes" print "\nRunning pGen.py to generate the predict script" print "Using the experiment folder " + args.e config = ConfigObj(args.e + "/design.ini") print "The config parameters that I am working with are" print config dirName = os.path.dirname(args.e) if args.a is None: algo = 'glmnet' else: algo = args.a args.s = args.s + "/" predictDataDirectoryName = args.pd.replace('/ro/', '/wf/') predictDataDirectoryName = predictDataDirectoryName + "/p/" + os.path.basename( os.path.dirname(args.e)) if not os.path.exists(predictDataDirectoryName): os.mkdir(predictDataDirectoryName) rProgName = "predict" + algo + "-td." + os.path.basename(os.path.abspath(args.td)) + "-dt." + args.dt + "-pd." + os.path.basename(os.path.abspath(args.pd)) \ + "-wt." + args.wt + attribute.generateExtension() +"-For"+os.path.basename(os.path.dirname(args.s))+"SubE.r" rProgLocation = dirName + '/' + rProgName rScript = open(rProgLocation, 'w') rScript.write('#!/usr/bin/Rscript \n') if (args.a == 'glmnet'): rScript.write('require (glmnet) \n') elif (args.a == 'randomForest'): rScript.write('require (randomForest) \n') rCodeGen.ForSetUpChecks(rScript) for target in config['target']: rCodeGen.ToReadFeatureFiles(rScript, config, target) rCodeGen.ForSanityChecks(rScript, config, target) designFiles = utility.list_files(args.s) for designFile in designFiles: print "Generating r code for " + designFile rScript.write('\n\nprint ("Running r code for' + designFile + '")\n') config = ConfigObj(designFile) for target in config['target']: predictionFileName = predictDataDirectoryName + "/" + args.a + target + '-td.' + os.path.basename(os.path.abspath(args.td)) + '-dt.' + args.dt +\ '-targetClass.' + args.targetClass + '-f.' + os.path.basename(os.path.dirname(designFile)) + "-wt." + args.wt+ attribute.generateExtension() +".predictions" if not os.path.isfile(predictionFileName) or (args.skipP.lower() == "no"): rCodeGen.ForPredictions(rScript, config, args, designFile, target) else: print predictionFileName + "Already exists , not generating it again . If you want to generate it again then rerun it with -skipP no " rScript.write('rm(list=ls())') rScript.close() print "Finished generating R prediction program: " + rProgLocation os.system("chmod +x " + rProgLocation)