def main(): parser = argparse.ArgumentParser(description='Generates train.r. A sample command is :- src/corrRGenForE.py -e ob/e/nsefut/CorExpHINDALCO/ -td ob/data/ro/nsefut/20141017/ -dt 10 -iT HINDALCO -sP -1 -oT 0') parser.add_argument('-e', required=True,help='Experiement folder to use to find the features and targets') parser.add_argument('-td',required=True,help="Day on which it was trained") parser.add_argument('-dt',required=True,help="Number of days it was trained") parser.add_argument('-iT',required=False,help='Instrument name') parser.add_argument('-sP',required=False,help='Strike price of instrument') parser.add_argument('-oT',required=False,help='Options Type') args = parser.parse_args() attribute.initializeInstDetails(args.iT,args.sP,args.oT) print "Using the experiment folder " + args.e print args.e+"/design.ini" config = ConfigObj(args.e+"/design.ini") print "The config parameters that I am working with are" print config dirName=os.path.dirname(args.e)+"/" trainingDaysDirectory = attribute.getListOfTrainingDirectoriesNames( int(args.dt) , args.td ,args.iT) for l_trainingday in trainingDaysDirectory: rProgName = "corr-date-"+ os.path.basename(os.path.abspath(l_trainingday)) +"-td." + os.path.basename(os.path.abspath(args.td)) + "-dt." + args.dt + attribute.generateExtension() +".r" rProgLocation = dirName+'/'+rProgName rScript = open(rProgLocation,'w') rScript.write('#!/usr/bin/Rscript \n') rCodeGen.ForSetUpChecks(rScript) lCorrelationFileName = dirName + '/correlation-coef-date-'+ os.path.basename(os.path.abspath(l_trainingday)) + '-td.' + os.path.basename(os.path.abspath(args.td))+ '-dt.' + args.dt + attribute.generateExtension() + ".coef" rCodeGen.ToReadTargetFile(rScript,config) for target in config['target']: rCodeGen.ToFindCorrelationDatewiseAndPrintingToFile(rScript,config,target,lCorrelationFileName) rScript.close() os.system("chmod +x "+rProgLocation)
def main(): global args args = parseCommandLine() experimentFolder = args.e trainDataFolder = args.td predictDataFolder = args.pd attribute.initializeInstDetails(args.iT,args.sP,args.oT) if args.mpMearge.lower() == "yes": commandList = getTrainPredictCommandList(experimentFolder,args.a,trainDataFolder,predictDataFolder,args.dt,args.wt) if args.sequence == 'lp': pool = multiprocessing.Pool() # this will return the number of CPU's results = pool.map(wrapper,commandList) # Calls trainWrapper function with each element of list trainScriptNames else: results = map(wrapper,commandList) else: commandList = getTrainCommandList(experimentFolder,args.a,trainDataFolder,args.dt,args.wt) if args.sequence == 'lp': # to run it in local parallel mode pool = multiprocessing.Pool() # this will return the number of CPU's results = pool.map(wrapper,commandList) # Calls trainWrapper function with each element of list trainScriptNames else: results = map(wrapper,commandList) commandList = getPredictCommandList(experimentFolder,args.a,predictDataFolder,trainDataFolder,args.dt,args.wt) if args.sequence == 'lp': # to run it in local parallel mode pool = multiprocessing.Pool() # this will return the number of CPU's results = pool.map(wrapper,commandList) # Calls trainWrapper function with each element of list trainScriptNames else: results = map(wrapper,commandList)
def main(): try: attribute.initializeInstDetails(args.iT, args.sP, args.oT) outputFileName = attribute.getFileNameFromOperationCommand( args.a1, args.a2, args.operand, args.d) if (os.path.isfile(outputFileName)): print "The attribute has already been generated. If you want to re-generate it then first delete the attribute file.", outputFileName lNameAfterDecimal = outputFileName.split(".")[-1] attributeBinaryFileName = outputFileName.replace( lNameAfterDecimal, "bin") if (os.path.isfile(attributeBinaryFileName)): print attributeBinaryFileName os._exit( 0 ) # We do not take it as a error condition hence return 0 and not -1 else: attribute.callRProgramToConvertToBinary(outputFileName) os._exit(0) attribute.aList, lListOfHeaderColNames = attribute.operateOnAttributes( args.a1, args.a2, args.operand, args.d) attribute.writeToFile(outputFileName, lListOfHeaderColNames) attribute.callRProgramToConvertToBinary(outputFileName) except: traceback.print_exc() e = sys.exc_info()[0] print e os._exit(-1)
def main(): args = parseCommandLine() dataFolder = args.d generatorsFolder = args.g attribute.initializeInstDetails(args.iT,args.sP,args.oT,args.rev) args.sequence = "lp" experimentFolder = args.e + "/design.ini" insideFeatureCommandList = getCommandListForInsideFeatures( experimentFolder,dataFolder,generatorsFolder,args.tickSize ) utility.runCommandList(insideFeatureCommandList,args) intermediateFeatureCommandList = getCommandListForIntermediateFeatures(experimentFolder,dataFolder,generatorsFolder,args.tickSize) utility.runCommandList(intermediateFeatureCommandList,args) commandList = getCommandList(experimentFolder,dataFolder,generatorsFolder,args.tickSize) utility.runCommandList(commandList,args) try: experimentFolder = args.e + "/design1.ini" insideFeatureCommandList = getCommandListForInsideFeatures( experimentFolder,dataFolder,generatorsFolder,args.tickSize ) utility.runCommandList(insideFeatureCommandList,args) intermediateFeatureCommandList = getCommandListForIntermediateFeatures(experimentFolder,dataFolder,generatorsFolder,args.tickSize) utility.runCommandList(intermediateFeatureCommandList,args) commandList = getCommandList(experimentFolder,dataFolder,generatorsFolder,args.tickSize) return utility.runCommandList(commandList,args) except: return
def main(): try: attribute.initializeInstDetails(args.iT, args.sP, args.oT, args.rev) if args.i is not None: attribute.checkIfAttributeOutputFileExists( os.path.basename(moduleName), args.n, args.i, args.o, args.m, args.d) else: attribute.checkIfAttributeOutputFileExists( os.path.basename(moduleName), args.n, args.c, args.o, args.m, args.d) if args.rev != None and args.rev.lower() == "yes": dataFile.getSelectedDataIntoMatrix(args.d) else: if (args.cType == "synthetic"): if "For6Levels" not in args.g: dataFile.getDataIntoMatrix(args.d, args.c, level=5) else: try: dataFile.getDataIntoMatrix(args.d, args.c, level=5) except: dataFile.getDataIntoMatrix(args.d, args.c, level=6) else: if "For6Levels" not in args.g: dataFile.getDataIntoMatrix(args.d, level=5) else: try: dataFile.getDataIntoMatrix(args.d, level=5) except: dataFile.getDataIntoMatrix(args.d, level=6) attribute.initList() lHeaderColumnNamesList = userModule.extractAttributeFromDataMatrix( args) if args.i is not None: fileName = attribute.getOutputFileNameFromGeneratorName( os.path.basename(moduleName), args.n, args.i, args.o, args.m, args.d) else: fileName = attribute.getOutputFileNameFromGeneratorName( os.path.basename(moduleName), args.n, args.c, args.o, args.m, args.d) print fileName attribute.writeToFile(fileName, lHeaderColumnNamesList) attribute.callRProgramToConvertToBinary(fileName) except: traceback.print_exc() e = sys.exc_info()[0] print e os._exit(-1)
def main(): args = parseCommandLine() experimentFolder = args.e dataFolder = args.d generatorsFolder = args.g attribute.initializeInstDetails(args.iT,args.sP,args.oT) args.sequence = "lp" # insideFeatureCommandList = getCommandListForInsideFeatures( experimentFolder,dataFolder,generatorsFolder,args.tickSize ) # utility.runCommandList(insideFeatureCommandList,args) # # intermediateFeatureCommandList = getCommandListForIntermediateFeatures(experimentFolder,dataFolder,generatorsFolder,args.tickSize) # utility.runCommandList(intermediateFeatureCommandList,args) # commandList = getCommandList(experimentFolder,dataFolder,generatorsFolder,args.tickSize) return utility.runCommandList(commandList,args)
def main(): args = parseCommandLine() experimentFolder = args.e dataFolder = args.d generatorsFolder = args.g attribute.initializeInstDetails(args.iT, args.sP, args.oT) args.sequence = "lp" # insideFeatureCommandList = getCommandListForInsideFeatures( experimentFolder,dataFolder,generatorsFolder,args.tickSize ) # utility.runCommandList(insideFeatureCommandList,args) # # intermediateFeatureCommandList = getCommandListForIntermediateFeatures(experimentFolder,dataFolder,generatorsFolder,args.tickSize) # utility.runCommandList(intermediateFeatureCommandList,args) # commandList = getCommandList(experimentFolder, dataFolder, generatorsFolder, args.tickSize) return utility.runCommandList(commandList, args)
def main(): global args args = parseCommandLine() experimentFolder = args.e trainDataFolder = args.td predictDataFolder = args.pd attribute.initializeInstDetails(args.iT, args.sP, args.oT) if args.mpMearge.lower() == "yes": commandList = getTrainPredictCommandList(experimentFolder, args.a, trainDataFolder, predictDataFolder, args.dt, args.wt) if args.sequence == 'lp': pool = multiprocessing.Pool( ) # this will return the number of CPU's results = pool.map( wrapper, commandList ) # Calls trainWrapper function with each element of list trainScriptNames else: results = map(wrapper, commandList) else: commandList = getTrainCommandList(experimentFolder, args.a, trainDataFolder, args.dt, args.wt) if args.sequence == 'lp': # to run it in local parallel mode pool = multiprocessing.Pool( ) # this will return the number of CPU's results = pool.map( wrapper, commandList ) # Calls trainWrapper function with each element of list trainScriptNames else: results = map(wrapper, commandList) commandList = getPredictCommandList(experimentFolder, args.a, predictDataFolder, trainDataFolder, args.dt, args.wt) if args.sequence == 'lp': # to run it in local parallel mode pool = multiprocessing.Pool( ) # this will return the number of CPU's results = pool.map( wrapper, commandList ) # Calls trainWrapper function with each element of list trainScriptNames else: results = map(wrapper, commandList)
def main(): try: attribute.initializeInstDetails(args.iT,args.sP,args.oT,args.rev) if args.i is not None: attribute.checkIfAttributeOutputFileExists(os.path.basename(moduleName),args.n,args.i,args.o,args.m,args.d) else: attribute.checkIfAttributeOutputFileExists(os.path.basename(moduleName),args.n,args.c,args.o,args.m,args.d) if args.rev!= None and args.rev.lower()=="yes": dataFile.getSelectedDataIntoMatrix(args.d) else: if(args.cType == "synthetic"): if "For6Levels" not in args.g: dataFile.getDataIntoMatrix(args.d,args.c , level=5) else: try: dataFile.getDataIntoMatrix(args.d,args.c, level=5) except: dataFile.getDataIntoMatrix(args.d,args.c, level=6) else: if "For6Levels" not in args.g: dataFile.getDataIntoMatrix(args.d , level=5) else: try: dataFile.getDataIntoMatrix(args.d, level=5) except: dataFile.getDataIntoMatrix(args.d, level=6) attribute.initList() lHeaderColumnNamesList = userModule.extractAttributeFromDataMatrix(args) if args.i is not None: fileName = attribute.getOutputFileNameFromGeneratorName(os.path.basename(moduleName),args.n,args.i,args.o,args.m,args.d ) else: fileName = attribute.getOutputFileNameFromGeneratorName(os.path.basename(moduleName),args.n,args.c,args.o,args.m,args.d ) print fileName attribute.writeToFile(fileName , lHeaderColumnNamesList) attribute.callRProgramToConvertToBinary(fileName) except: traceback.print_exc() e = sys.exc_info()[0] print e os._exit(-1)
def main(): # try: attribute.initializeInstDetails(args.iT,args.sP,args.oT) dataFile.getDataIntoMatrix(args.d) attribute.checkIfNewDataFileExists(args.d) newFileName = dataFile.getNewDataFileName(args.d) header = "Instrument;AskQ0;AskP0;AskQ1;AskP1;AskQ2;AskP2;AskQ3;AskP3;AskQ4;AskP4;BidQ0;BidP0;BidQ1;BidP1;BidQ2;BidP2;BidQ3;BidP3;BidQ4;BidP4;TTQ;LTP;LTQ;LTT;ATP;TBQ;TSQ;CP;OP;HP;LP;TimeStamp;SerialNo;MsgCode;OrderType;Quantity1;Price1;Quantity2;Price2;ExchangeTS;BestBidQ;BestBidP;BestAskQ;BestAskP\n" print newFileName newFileObject = open(newFileName,"w") newFileObject.write(header) previousLine = "" currentRowCount = 0 for dataRow in dataFile.matrix: lineWithoutBestPrices = ";".join(dataRow[:colNumberOfData.TTQ]) if previousLine <> lineWithoutBestPrices: newFileObject.write(";".join(dataRow) + "\n") previousLine = lineWithoutBestPrices currentRowCount = currentRowCount + 1 if(currentRowCount % 1000 == 0): print "Processed row number " + str(currentRowCount)
def main(): try: attribute.initializeInstDetails(args.iT,args.sP,args.oT) outputFileName = attribute.getFileNameFromOperationCommand(args.a1,args.a2,args.operand,args.d) if (os.path.isfile(outputFileName)): print "The attribute has already been generated. If you want to re-generate it then first delete the attribute file." , outputFileName lNameAfterDecimal = outputFileName.split(".")[-1] attributeBinaryFileName = outputFileName.replace(lNameAfterDecimal,"bin") if (os.path.isfile(attributeBinaryFileName)): print attributeBinaryFileName os._exit(0) # We do not take it as a error condition hence return 0 and not -1 else: attribute.callRProgramToConvertToBinary(outputFileName) os._exit(0) attribute.aList,lListOfHeaderColNames = attribute.operateOnAttributes(args.a1,args.a2,args.operand,args.d) attribute.writeToFile(outputFileName,lListOfHeaderColNames) attribute.callRProgramToConvertToBinary(outputFileName) except: traceback.print_exc() e = sys.exc_info()[0] print e os._exit(-1)
def main(): args = parseCommandLine() dataFolder = args.d generatorsFolder = args.g attribute.initializeInstDetails(args.iT, args.sP, args.oT, args.rev) args.sequence = "lp" experimentFolder = args.e + "/design.ini" insideFeatureCommandList = getCommandListForInsideFeatures( experimentFolder, dataFolder, generatorsFolder, args.tickSize) utility.runCommandList(insideFeatureCommandList, args) intermediateFeatureCommandList = getCommandListForIntermediateFeatures( experimentFolder, dataFolder, generatorsFolder, args.tickSize) utility.runCommandList(intermediateFeatureCommandList, args) commandList = getCommandList(experimentFolder, dataFolder, generatorsFolder, args.tickSize) utility.runCommandList(commandList, args) try: experimentFolder = args.e + "/design1.ini" insideFeatureCommandList = getCommandListForInsideFeatures( experimentFolder, dataFolder, generatorsFolder, args.tickSize) utility.runCommandList(insideFeatureCommandList, args) intermediateFeatureCommandList = getCommandListForIntermediateFeatures( experimentFolder, dataFolder, generatorsFolder, args.tickSize) utility.runCommandList(intermediateFeatureCommandList, args) commandList = getCommandList(experimentFolder, dataFolder, generatorsFolder, args.tickSize) return utility.runCommandList(commandList, args) except: return
def main(): parser = argparse.ArgumentParser(description='Generates predict.r which will use design.model to make predictions. Sample command is pGenForE.py -e ob/e1/') parser.add_argument('-e', required=True,help='Directory to find the experiement designs') parser.add_argument('-a', required=True,help='Algorithm name') parser.add_argument('-pd', required=True,help='Prediction directory') parser.add_argument('-td',required=True,help="Day on which it was trained") parser.add_argument('-dt',required=True,help="Number of days it was trained") parser.add_argument('-wt',required=True,help="exp/default") parser.add_argument('-targetClass',required=True,help="For which model was used ; binomial(target takes only true and false) / multinomial (target values takes more than 2 values)") parser.add_argument('-skipP',required=False,help="yes or no , If you want to regenerate already generated algorithm prediction file then make this value No") parser.add_argument('-s',required=False,help="Experiment sub folders") parser.add_argument('-iT',required=False,help='Instrument name') parser.add_argument('-sP',required=False,help='Strike price of instrument') parser.add_argument('-oT',required=False,help='Options Type') parser.add_argument('-double',required=False,help='Double training of in model') args = parser.parse_args() attribute.initializeInstDetails(args.iT,args.sP,args.oT) if args.skipP == None: args.skipP = "yes" if args.s == None: args.s = args.e print "\nRunning pGen.py to generate the predict script" print "Using the experiment folder " + args.e config = ConfigObj(args.s+"/design.ini") print "The config parameters that I am working with are" print config dirName=os.path.dirname(args.s) if args.a is None: algo ='glmnet' else: algo =args.a import pdb #pdb.set_trace() if args.double: rProgName = "predict" + algo + "-td." + os.path.basename(os.path.abspath(args.td)) + "-dt." + args.dt + "-pd." + os.path.basename(os.path.abspath(args.pd)) \ + "-wt." + args.wt+ attribute.generateExtension() + "double.r" else: rProgName = "predict" + algo + "-td." + os.path.basename(os.path.abspath(args.td)) + "-dt." + args.dt + "-pd." + os.path.basename(os.path.abspath(args.pd)) \ + "-wt." + args.wt+ attribute.generateExtension() + ".r" rProgLocation = dirName+'/'+rProgName rScript = open(rProgLocation,'w') rScript.write('#!/usr/bin/Rscript \n') predictDataDirectoryName = args.pd.replace('/ro/','/wf/') predictDataDirectoryName = predictDataDirectoryName + "/p/" + os.path.basename(os.path.dirname(args.e)) + "/" if not os.path.exists(predictDataDirectoryName): os.mkdir(predictDataDirectoryName) if(args.a == 'glmnet'): rScript.write('require (glmnet) \n') elif(args.a == 'randomForest'): rScript.write('require (randomForest) \n') rCodeGen.ForSetUpChecks(rScript) lAllFilePresent = True for target in config['target']: if args.double: predictionFileName = predictDataDirectoryName + "/" + args.a + target +'-td.' + os.path.basename(os.path.abspath(args.td)) \ + '-dt.' + args.dt + '-targetClass.' + args.targetClass + '-f.' + os.path.basename(os.path.dirname(args.s)) + \ "-wt." + args.wt+ attribute.generateExtension() +"double.predictions" else: predictionFileName = predictDataDirectoryName + "/" + args.a + target +'-td.' + os.path.basename(os.path.abspath(args.td)) \ + '-dt.' + args.dt + '-targetClass.' + args.targetClass + '-f.' + os.path.basename(os.path.dirname(args.s)) + \ "-wt." + args.wt+ attribute.generateExtension() +".predictions" if os.path.isfile(predictionFileName) and ( args.skipP.lower() == "yes" ): continue else: lAllFilePresent = False break if lAllFilePresent == False: for target in config['target']: rCodeGen.ToReadFeatureFiles(rScript,config,target) rCodeGen.ForSanityChecks(rScript,config,target) if args.double: predictionFileName = predictDataDirectoryName + "/" + args.a + target +'-td.' + os.path.basename(os.path.abspath(args.td)) \ + '-dt.' + args.dt + '-targetClass.' + args.targetClass + '-f.' + os.path.basename(os.path.dirname(args.s)) + \ "-wt." + args.wt+ attribute.generateExtension() +"double.predictions" else: predictionFileName = predictDataDirectoryName + "/" + args.a + target +'-td.' + os.path.basename(os.path.abspath(args.td)) \ + '-dt.' + args.dt + '-targetClass.' + args.targetClass + '-f.' + os.path.basename(os.path.dirname(args.s)) + \ "-wt." + args.wt+ attribute.generateExtension() +".predictions" if not os.path.isfile(predictionFileName) or ( args.skipP.lower() == "no" ): if args.double: lModelGeneratedAfterTraining = args.s + '/' + args.a + target + '-td.' + os.path.basename(os.path.abspath(args.td)) + '-dt.' + args.dt + '-targetClass.' + args.targetClass + "-wt." + args.wt + 'double.model' rCodeGen.ForPredictions(rScript,config,args,args.s,target,2,"double") else: lModelGeneratedAfterTraining = args.s + '/' + args.a + target + '-td.' + os.path.basename(os.path.abspath(args.td)) + '-dt.' + args.dt + '-targetClass.' + args.targetClass + "-wt." + args.wt + '.model' rCodeGen.ForPredictions(rScript,config,args,args.s,target) print lModelGeneratedAfterTraining else: print predictionFileName + "Already exists , not generating it again . If you want to generate it again then rerun it with -skipP no " rScript.close() print "Finished generating R prediction program: " + rProgLocation os.system("chmod +x "+rProgLocation)
def main(): parser = argparse.ArgumentParser(description='Generates train.r. A sample command is mGenForE.py -e ob/e1/ ') parser.add_argument('-e', required=True,help='Experiement folder to use to find the features and targets') parser.add_argument('-a', required=True,help='Algorithm name') parser.add_argument('-s', required=True,help='Location of the folder containing all the sub experiments') parser.add_argument('-targetClass',required=True,help="binomial(target takes only true and false) / multinomial (target values takes more than 2 values)") parser.add_argument('-skipM',required=False,help="yes or no , If you want to regenerate already generated algorithm model file then make this value No") parser.add_argument('-skipP',required=False,help="yes or no , If you want to regenerate already generated algorithm prediction file then make this value No") parser.add_argument('-pd', required=True,help='Prediction directory') parser.add_argument('-td',required=True,help="Day on which it was trained") parser.add_argument('-dt',required=True,help="Number of days it was trained") parser.add_argument('-wt',required=True,help="default/exp , weight type to be given to different days") parser.add_argument('-iT',required=False,help='Instrument name') parser.add_argument('-sP',required=False,help='Strike price of instrument') parser.add_argument('-oT',required=False,help='Options Type') args = parser.parse_args() attribute.initializeInstDetails(args.iT,args.sP,args.oT) if args.skipM == None: args.skipM = "yes" if args.skipP == None: args.skipP = "yes" print "Using the experiment folder " + args.e print "Training files steps" config = ConfigObj(args.e+"/design.ini") print "The config parameters that I am working with are" print config predictionDataDirectoryName = args.pd.replace('/ro/','/wf/') predictionDataDirectoryName = predictionDataDirectoryName + "/p/" + os.path.basename(os.path.dirname(args.e)) if not os.path.exists(predictionDataDirectoryName): os.mkdir(predictionDataDirectoryName) dirName=os.path.dirname(args.e) algo = rCodeGen.getAlgoName(args) args.s = args.s + "/" rProgName = "trainPredict"+ algo + "-td." + os.path.basename(os.path.abspath(args.td)) + "-dt." + args.dt +\ "-pd." + os.path.basename(os.path.abspath(args.pd)) + "-wt." + args.wt+ attribute.generateExtension() + "-For"+os.path.basename(os.path.dirname(args.s))+"SubE.r" rProgLocation = dirName+'/'+rProgName rScript = open(rProgLocation,'w') rScript.write('#!/usr/bin/Rscript \n') if(algo == 'glmnet'): rScript.write('require (glmnet) \n') elif(algo == 'randomForest'): rScript.write('require (randomForest) \n') elif(algo == 'mda'): rScript.write('require (mda) \n') rCodeGen.ForSetUpChecksForTrainPredictTogather(rScript) rCodeGen.ToReadTargetFile(rScript,config) rCodeGen.ForWtVectorGeneration(rScript,args.wt.lower()) for target in config['target']: rCodeGen.ToReadFeatureFiles(rScript,config,target,2) rCodeGen.ForSanityChecks(rScript,config,target) print "For prediction data set" configForPredictions = ConfigObj(args.e+"/design.ini") print "The config parameters that I am working with are" for target in configForPredictions['target']: feature_keys = configForPredictions['features-'+target].keys() features = configForPredictions['features-'+target] for key in feature_keys: new_key = key + "P" features[new_key] = features[key] del features[key] print configForPredictions rCodeGen.ToReadFeatureFiles(rScript,configForPredictions,target,4) rCodeGen.ForSanityChecks(rScript,configForPredictions,target) designFiles = utility.list_files(args.s) for designFile in designFiles: print "Generating r code for " + designFile rScript.write('\n\nprint ("Running r code for ' + designFile + '")\n') config = ConfigObj(designFile) configForPredictions = ConfigObj(designFile) #--------------MODEL-------------------- for target in config['target']: feature_keys = configForPredictions['features-'+target].keys() features = configForPredictions['features-'+target] for key in feature_keys: new_key = key + "P" features[new_key] = features[key] del features[key] lModelGeneratedAfterTraining = os.path.dirname(designFile) + '/' + algo + target + '-td.' + os.path.basename(os.path.abspath(args.td)) + \ '-dt.' + args.dt + '-targetClass.' + args.targetClass + "-wt." + args.wt+ attribute.generateExtension() + '.model' if os.path.isfile(lModelGeneratedAfterTraining)and ( args.skipM.lower() == "yes" ): print "Model File " + lModelGeneratedAfterTraining + " already exists . So it will not be formed again . If you want to re-generate model then re-run with -skipM=No" else: rCodeGen.ToCreateDataFrameForTraining(rScript,config,target) rCodeGen.ForTraining(rScript,args,config,target) rCodeGen.saveTrainingModel(rScript,args,os.path.dirname(designFile),target) #--------------Prediction Part-------------------- predictionFileName = predictionDataDirectoryName + "/" + args.a + target + '-td.' + os.path.basename(os.path.abspath(args.td)) + \ '-dt.' + args.dt + '-targetClass.' + args.targetClass + '-f.' + os.path.basename(os.path.dirname(designFile)) +\ "-wt." + args.wt+ attribute.generateExtension() +".predictions" if not os.path.isfile(predictionFileName) or ( args.skipP.lower() == "no" ): rCodeGen.ForPredictions(rScript,configForPredictions,args,designFile,target,4) else: print "Prediction File " + predictionFileName + "Already exists , not generating it again . If you want to generate it again then rerun it with -skipP no " rScript.write('rm(list=ls())') rScript.close() print "Finished generating R training program: " + rProgLocation os.system("chmod +x "+rProgLocation)
def main(): parser = argparse.ArgumentParser(description='Generates train.r. A sample command is mGenForE.py -e ob/e1/ ') parser.add_argument('-e', required=True,help='Experiement folder to use to find the features and targets') parser.add_argument('-a', required=True,help='Algorithm name') parser.add_argument('-s', required=True,help='Location of the folder containing all the sub experiments') parser.add_argument('-targetClass',required=True,help="binomial(target takes only true and false) / multinomial (target values takes more than 2 values)") parser.add_argument('-skipM',required=False,help="yes or no , If you want to regenerate already generated algorithm model file then make this value No") parser.add_argument('-td',required=True,help="Day on which it was trained") parser.add_argument('-dt',required=True,help="Number of days it was trained") parser.add_argument('-wt',required=True,help="default/exp , weight type to be given to different days") parser.add_argument('-iT',required=False,help='Instrument name') parser.add_argument('-sP',required=False,help='Strike price of instrument') parser.add_argument('-oT',required=False,help='Options Type') args = parser.parse_args() attribute.initializeInstDetails(args.iT,args.sP,args.oT) if args.skipM == None: args.skipM = "yes" print "Using the experiment folder " + args.e config = ConfigObj(args.e+"/design.ini") print "The config parameters that I am working with are" print config dirName=os.path.dirname(args.e) algo = rCodeGen.getAlgoName(args) args.s = args.s + "/" rProgName = "train" + algo + "-td." + os.path.basename(os.path.abspath(args.td)) + "-dt." + args.dt + "-wt." + args.wt+ attribute.generateExtension() +\ "-For"+os.path.basename(os.path.dirname(args.s))+"SubE.r" rProgLocation = dirName+'/'+rProgName rScript = open(rProgLocation,'w') rScript.write('#!/usr/bin/Rscript \n') if(algo == 'glmnet'): rScript.write('require (glmnet) \n') elif(algo == 'randomForest'): rScript.write('require (randomForest) \n') elif(algo == 'mda'): rScript.write('require (mda) \n') rCodeGen.ForSetUpChecks(rScript) rCodeGen.ToReadTargetFile(rScript,config) rCodeGen.ForWtVectorGeneration(rScript,args.wt.lower()) for target in config['target']: rCodeGen.ToReadFeatureFiles(rScript,config,target) rCodeGen.ForSanityChecks(rScript,config,target) designFiles = utility.list_files(args.s) for designFile in designFiles: print "Generating r code for " + designFile rScript.write('\n\nprint ("Running r code for ' + designFile + '")\n') config = ConfigObj(designFile) for target in config['target']: lModelGeneratedAfterTraining = os.path.dirname(designFile) + '/' + algo + target + '-td.' + os.path.basename(os.path.abspath(args.td)) + \ '-dt.' + args.dt + '-targetClass.' + args.targetClass + "-wt." + args.wt+ attribute.generateExtension() + '.model' if os.path.isfile(lModelGeneratedAfterTraining)and ( args.skipM.lower() == "yes" ): print "Model File " + lModelGeneratedAfterTraining + " already exists . So it will not be formed again . If you want to re-generate model then re-run with -skipM=No" else: rCodeGen.ToCreateDataFrameForTraining(rScript,config,target) rCodeGen.ForTraining(rScript,args,config,target) rCodeGen.saveTrainingModel(rScript,args,os.path.dirname(designFile),target) rScript.write('rm(list=ls())') rScript.close() print "Finished generating R training program: " + rProgLocation os.system("chmod +x "+rProgLocation)
def main(): parser = argparse.ArgumentParser(description='Generates train.r. A sample command is mGenForE.py -e ob/e1/ ') parser.add_argument('-e', required=True,help='Experiement folder to use to find the features and targets') parser.add_argument('-a', required=True,help='Algorithm name') parser.add_argument('-targetClass',required=True,help="binomial(target takes only true and false) / multinomial (target values takes more than 2 values)") parser.add_argument('-skipT',required=False,help="yes or no , If you want to regenerate already generated algorithm model file then make this value No") parser.add_argument('-td',required=True,help="Day on which it was trained") parser.add_argument('-dt',required=True,help="Number of days it was trained") parser.add_argument('-wt',required=True,help="default/exp , weight type to be given to different days") parser.add_argument('-iT',required=False,help='Instrument name') parser.add_argument('-sP',required=False,help='Strike price of instrument') parser.add_argument('-oT',required=False,help='Options Type') parser.add_argument('-treeType',required=False,help="Tree read for trade engine") parser.add_argument('-tTD',required=False,help="Tree number of days to be used") args = parser.parse_args() attribute.initializeInstDetails(args.iT,args.sP,args.oT) if args.skipT == None: args.skipT = "yes" if args.tTD == None: args.tTD = args.dt print "Using the experiment folder " + args.e config = ConfigObj(args.e+"/design1.ini") configInit = ConfigObj(args.e+"design.ini") # configInitList = [] # for iniFile in os.listdir(args.e + "/"): # if '.ini' in iniFile and iniFile != 'design.ini': # index = iniFile[ file.index(".") - 1 ] # configInitList.append( ( index, ConfigObj(args.e+"/"+iniFile) ) ) # configInit = dict(configInitList) print "The config parameters that I am working with are" print config dirName=os.path.dirname(args.e)+"/" algo = rCodeGen.getAlgoName(args) rProgName = "traintree" + "-td." + os.path.basename(os.path.abspath(args.td)) + "-tTD" + args.tTD + "-dt." + args.dt + "-wt." + args.wt + attribute.generateExtension() +".r" rProgLocation = dirName+'/'+rProgName rScript = open(rProgLocation,'w') rScript.write('#!/usr/bin/Rscript \n') rScript.write('require (rpart) \n') rCodeGen.ForSetUpChecks(rScript) lAllFilePresent = True for target in config['target']: lTreeFileName = dirName+"/"+algo+ target+'-td.' + os.path.basename(os.path.abspath(args.td)) + "-tTD" + args.tTD + '-dt.' + args.dt + attribute.generateExtension() +".tree" + args.treeType if os.path.isfile(lTreeFileName) and ( args.skipT.lower() == "yes" ): continue else: lAllFilePresent = False break if lAllFilePresent == False: rCodeGen.ToReadTargetFile(rScript,config) rCodeGen.ForWtVectorGeneration(rScript,args.wt.lower()) for target in config['target']: rCodeGen.ToReadFeatureFiles(rScript,config,target) rCodeGen.ToReadPredictionFiles(rScript,config,target,configInit) rCodeGen.ForSanityChecks(rScript,config,target) lTreeFileName = dirName+"/"+algo+ target+'-td.' + os.path.basename(os.path.abspath(args.td)) + "-tTD" + args.tTD +'-dt.' + args.dt + attribute.generateExtension() +".tree" + args.treeType if os.path.isfile(lTreeFileName) and ( args.skipT.lower() == "yes" ): print "Model File " + lTreeFileName + " already exists . So it will not be formed again . If you want to re-generate model then re-run with -skipT=No" else: rCodeGen.ToRenameDataBeforeTraining(rScript,config,target) rCodeGen.ForTrainingTree(rScript,args,config,target, args.treeType) print lTreeFileName rCodeGen.saveTrainingTree(rScript,args,dirName,target, lTreeFileName) rScript.close() print "Finished generating R training program: " + rProgLocation os.system("chmod +x "+rProgLocation)
def main(): parser = argparse.ArgumentParser(description='Generates train.r. A sample command is mGenForE.py -e ob/e1/ ') parser.add_argument('-e', required=True,help='Experiement folder to use to find the features and targets') parser.add_argument('-a', required=True,help='Algorithm name') parser.add_argument('-targetClass',required=True,help="binomial(target takes only true and false) / multinomial (target values takes more than 2 values)") parser.add_argument('-skipM',required=False,help="yes or no , If you want to regenerate already generated algorithm model file then make this value No") parser.add_argument('-td',required=True,help="Day on which it was trained") parser.add_argument('-dt',required=True,help="Number of days it was trained") parser.add_argument('-wt',required=True,help="default/exp , weight type to be given to different days") parser.add_argument('-iT',required=False,help='Instrument name') parser.add_argument('-sP',required=False,help='Strike price of instrument') parser.add_argument('-oT',required=False,help='Options Type') parser.add_argument('-double',required=False,help='Double training of in model') args = parser.parse_args() attribute.initializeInstDetails(args.iT,args.sP,args.oT) if args.skipM == None: args.skipM = "yes" print "Using the experiment folder " + args.e config = ConfigObj(args.e+"/design.ini") print "The config parameters that I am working with are" print config dirName=os.path.dirname(args.e)+"/" algo = rCodeGen.getAlgoName(args) if args.double: rProgName = "train" + algo + "-td." + os.path.basename(os.path.abspath(args.td)) + "-dt." + args.dt + "-wt." + args.wt + attribute.generateExtension() +"double.r" else: rProgName = "train" + algo + "-td." + os.path.basename(os.path.abspath(args.td)) + "-dt." + args.dt + "-wt." + args.wt + attribute.generateExtension() +".r" rProgLocation = dirName+'/'+rProgName rScript = open(rProgLocation,'w') rScript.write('#!/usr/bin/Rscript \n') if(algo == 'glmnet'): rScript.write('require (glmnet) \n') elif(algo == 'randomForest'): rScript.write('require (randomForest) \n') elif(algo == 'mda'): rScript.write('require (mda) \n') rCodeGen.ForSetUpChecks(rScript) lAllFilePresent = True for target in config['target']: if args.double: lModelGeneratedAfterTraining = dirName + '/' + algo + target + '-td.' + os.path.basename(os.path.abspath(args.td))\ + '-dt.' + args.dt + '-targetClass.' + args.targetClass + "-wt." + args.wt+ attribute.generateExtension() +'double.model' else: lModelGeneratedAfterTraining = dirName + '/' + algo + target + '-td.' + os.path.basename(os.path.abspath(args.td))\ + '-dt.' + args.dt + '-targetClass.' + args.targetClass + "-wt." + args.wt+ attribute.generateExtension() +'.model' if os.path.isfile(lModelGeneratedAfterTraining) and ( args.skipM.lower() == "yes" ): continue else: lAllFilePresent = False break if lAllFilePresent == False: rCodeGen.ToReadTargetFile(rScript,config) rCodeGen.ForWtVectorGeneration(rScript,args.wt.lower()) for target in config['target']: rCodeGen.ToReadFeatureFiles(rScript,config,target) rCodeGen.ForSanityChecks(rScript,config,target) if args.double: lModelGeneratedAfterTraining = dirName + '/' + algo + target + '-td.' + os.path.basename(os.path.abspath(args.td))\ + '-dt.' + args.dt + '-targetClass.' + args.targetClass + "-wt." + args.wt+ attribute.generateExtension() +'double.model' lTempModelName = dirName + '/' + algo + target + '-td.' + os.path.basename(os.path.abspath(args.td))\ + '-dt.' + args.dt + '-targetClass.' + args.targetClass + "-wt." + args.wt+ attribute.generateExtension() +'.model' else: lModelGeneratedAfterTraining = dirName + '/' + algo + target + '-td.' + os.path.basename(os.path.abspath(args.td))\ + '-dt.' + args.dt + '-targetClass.' + args.targetClass + "-wt." + args.wt+ attribute.generateExtension() +'.model' if os.path.isfile(lModelGeneratedAfterTraining) and ( args.skipM.lower() == "yes" ): print "Model File " + lModelGeneratedAfterTraining + " already exists . So it will not be formed again . So it will not be formed again . If you want to re-generate model then re-run with -skipM=No" else: rCodeGen.ToCreateDataFrameForTraining(rScript,config,target) if args.double: if os.path.isfile(lTempModelName): rCodeGen.ForLoadingModel(rScript,args,dirName,target,config) else: rCodeGen.ForTraining(rScript,args,config,target) rCodeGen.forPreparingWtVectorForDoubleTraining(rScript,args,target) rCodeGen.saveTrainingModel(rScript,args,dirName,target,"double") else: rCodeGen.ForTraining(rScript,args,config,target) rCodeGen.saveTrainingModel(rScript,args,dirName,target) rScript.close() print "Finished generating R training program: " + rProgLocation os.system("chmod +x "+rProgLocation)
import rCodeGen, utility import attribute import aGenForE parser = argparse.ArgumentParser(description='This program will get results for all the subexperiments. \n' , formatter_class=argparse.RawTextHelpFormatter) parser.add_argument('-td', required=True,help='Training directory') parser.add_argument('-dt',required=True,help='Number of days after start training day specified . Defaults to 1 ') parser.add_argument('-run', required=True,help='dry (only show dont execute) or real (show and execute)') parser.add_argument('-sequence', required=True,help='lp (Local parallel) / dp (Distributed parallel) / serial') parser.add_argument('-nComputers',required=True,help="Number of computers at which task has to be run present in the data set") parser.add_argument('-iT',required=False,help='Instrument name') parser.add_argument('-sP',required=False,help='Strike price of instrument') parser.add_argument('-oT',required=False,help='Options Type') args = parser.parse_args() attribute.initializeInstDetails(args.iT,args.sP,args.oT) if(args.sequence == "dp"): import dp allDataDirectories = attribute.getListOfTrainingDirectoriesNames( int(args.dt) , args.td ,args.iT) commandList = [] for directories in allDataDirectories: #src/removeDuplicatesFromOrderBook.py -d ob/data/ro/nsefut/20141126/ -iT SBIN -oT 0 -sP -1 commandList.append(["removeDuplicatesFromOrderBook.py", "-d", directories, "-iT", args.iT, "-oT", args.oT, "-sP", args.sP]) for chunkNum in range(0,len(commandList),int(args.nComputers)): lSubGenList = commandList[chunkNum:chunkNum+int(args.nComputers)] utility.runCommandList(lSubGenList,args) print dp.printGroupStatus()
def main(): parser = argparse.ArgumentParser( description= 'Generates predict.r which will use design.model to make predictions. Sample command is pGenForE.py -e ob/e1/' ) parser.add_argument('-e', required=True, help='Directory to find the experiement designs') parser.add_argument('-a', required=True, help='Algorithm name') parser.add_argument('-pd', required=True, help='Prediction directory') parser.add_argument('-td', required=True, help="Day on which it was trained") parser.add_argument('-dt', required=True, help="Number of days it was trained") parser.add_argument('-wt', required=True, help="exp/default") parser.add_argument( '-targetClass', required=True, help= "For which model was used ; binomial(target takes only true and false) / multinomial (target values takes more than 2 values)" ) parser.add_argument( '-skipP', required=False, help= "yes or no , If you want to regenerate already generated algorithm prediction file then make this value No" ) parser.add_argument('-s', required=False, help="Experiment sub folders") parser.add_argument('-iT', required=False, help='Instrument name') parser.add_argument('-sP', required=False, help='Strike price of instrument') parser.add_argument('-oT', required=False, help='Options Type') parser.add_argument('-double', required=False, help='Double training of in model') args = parser.parse_args() attribute.initializeInstDetails(args.iT, args.sP, args.oT) if args.skipP == None: args.skipP = "yes" if args.s == None: args.s = args.e print "\nRunning pGen.py to generate the predict script" print "Using the experiment folder " + args.e config = ConfigObj(args.s + "/design.ini") print "The config parameters that I am working with are" print config dirName = os.path.dirname(args.s) if args.a is None: algo = 'glmnet' else: algo = args.a import pdb #pdb.set_trace() if args.double: rProgName = "predict" + algo + "-td." + os.path.basename(os.path.abspath(args.td)) + "-dt." + args.dt + "-pd." + os.path.basename(os.path.abspath(args.pd)) \ + "-wt." + args.wt+ attribute.generateExtension() + "double.r" else: rProgName = "predict" + algo + "-td." + os.path.basename(os.path.abspath(args.td)) + "-dt." + args.dt + "-pd." + os.path.basename(os.path.abspath(args.pd)) \ + "-wt." + args.wt+ attribute.generateExtension() + ".r" rProgLocation = dirName + '/' + rProgName rScript = open(rProgLocation, 'w') rScript.write('#!/usr/bin/Rscript \n') predictDataDirectoryName = args.pd.replace('/ro/', '/wf/') predictDataDirectoryName = predictDataDirectoryName + "/p/" + os.path.basename( os.path.dirname(args.e)) + "/" if not os.path.exists(predictDataDirectoryName): os.mkdir(predictDataDirectoryName) if (args.a == 'glmnet'): rScript.write('require (glmnet) \n') elif (args.a == 'randomForest'): rScript.write('require (randomForest) \n') rCodeGen.ForSetUpChecks(rScript) lAllFilePresent = True for target in config['target']: if args.double: predictionFileName = predictDataDirectoryName + "/" + args.a + target +'-td.' + os.path.basename(os.path.abspath(args.td)) \ + '-dt.' + args.dt + '-targetClass.' + args.targetClass + '-f.' + os.path.basename(os.path.dirname(args.s)) + \ "-wt." + args.wt+ attribute.generateExtension() +"double.predictions" else: predictionFileName = predictDataDirectoryName + "/" + args.a + target +'-td.' + os.path.basename(os.path.abspath(args.td)) \ + '-dt.' + args.dt + '-targetClass.' + args.targetClass + '-f.' + os.path.basename(os.path.dirname(args.s)) + \ "-wt." + args.wt+ attribute.generateExtension() +".predictions" if os.path.isfile(predictionFileName) and (args.skipP.lower() == "yes"): continue else: lAllFilePresent = False break if lAllFilePresent == False: for target in config['target']: rCodeGen.ToReadFeatureFiles(rScript, config, target) rCodeGen.ForSanityChecks(rScript, config, target) if args.double: predictionFileName = predictDataDirectoryName + "/" + args.a + target +'-td.' + os.path.basename(os.path.abspath(args.td)) \ + '-dt.' + args.dt + '-targetClass.' + args.targetClass + '-f.' + os.path.basename(os.path.dirname(args.s)) + \ "-wt." + args.wt+ attribute.generateExtension() +"double.predictions" else: predictionFileName = predictDataDirectoryName + "/" + args.a + target +'-td.' + os.path.basename(os.path.abspath(args.td)) \ + '-dt.' + args.dt + '-targetClass.' + args.targetClass + '-f.' + os.path.basename(os.path.dirname(args.s)) + \ "-wt." + args.wt+ attribute.generateExtension() +".predictions" if not os.path.isfile(predictionFileName) or (args.skipP.lower() == "no"): if args.double: lModelGeneratedAfterTraining = args.s + '/' + args.a + target + '-td.' + os.path.basename( os.path.abspath(args.td) ) + '-dt.' + args.dt + '-targetClass.' + args.targetClass + "-wt." + args.wt + 'double.model' rCodeGen.ForPredictions(rScript, config, args, args.s, target, 2, "double") else: lModelGeneratedAfterTraining = args.s + '/' + args.a + target + '-td.' + os.path.basename( os.path.abspath(args.td) ) + '-dt.' + args.dt + '-targetClass.' + args.targetClass + "-wt." + args.wt + '.model' rCodeGen.ForPredictions(rScript, config, args, args.s, target) print lModelGeneratedAfterTraining else: print predictionFileName + "Already exists , not generating it again . If you want to generate it again then rerun it with -skipP no " rScript.close() print "Finished generating R prediction program: " + rProgLocation os.system("chmod +x " + rProgLocation)
def main(): parser = argparse.ArgumentParser( description= 'Generates train.r. A sample command is mGenForE.py -e ob/e1/ ') parser.add_argument( '-e', required=True, help='Experiement folder to use to find the features and targets') parser.add_argument('-a', required=True, help='Algorithm name') parser.add_argument( '-targetClass', required=True, help= "binomial(target takes only true and false) / multinomial (target values takes more than 2 values)" ) parser.add_argument( '-skipT', required=False, help= "yes or no , If you want to regenerate already generated algorithm model file then make this value No" ) parser.add_argument('-td', required=True, help="Day on which it was trained") parser.add_argument('-dt', required=True, help="Number of days it was trained") parser.add_argument( '-wt', required=True, help="default/exp , weight type to be given to different days") parser.add_argument('-iT', required=False, help='Instrument name') parser.add_argument('-sP', required=False, help='Strike price of instrument') parser.add_argument('-oT', required=False, help='Options Type') parser.add_argument('-treeType', required=False, help="Tree read for trade engine") parser.add_argument('-tTD', required=False, help="Tree number of days to be used") args = parser.parse_args() attribute.initializeInstDetails(args.iT, args.sP, args.oT) if args.skipT == None: args.skipT = "yes" if args.tTD == None: args.tTD = args.dt print "Using the experiment folder " + args.e config = ConfigObj(args.e + "/design1.ini") configInit = ConfigObj(args.e + "design.ini") # configInitList = [] # for iniFile in os.listdir(args.e + "/"): # if '.ini' in iniFile and iniFile != 'design.ini': # index = iniFile[ file.index(".") - 1 ] # configInitList.append( ( index, ConfigObj(args.e+"/"+iniFile) ) ) # configInit = dict(configInitList) print "The config parameters that I am working with are" print config dirName = os.path.dirname(args.e) + "/" algo = rCodeGen.getAlgoName(args) rProgName = "traintree" + "-td." + os.path.basename( os.path.abspath(args.td) ) + "-tTD" + args.tTD + "-dt." + args.dt + "-wt." + args.wt + attribute.generateExtension( ) + ".r" rProgLocation = dirName + '/' + rProgName rScript = open(rProgLocation, 'w') rScript.write('#!/usr/bin/Rscript \n') rScript.write('require (rpart) \n') rCodeGen.ForSetUpChecks(rScript) lAllFilePresent = True for target in config['target']: lTreeFileName = dirName + "/" + algo + target + '-td.' + os.path.basename( os.path.abspath(args.td) ) + "-tTD" + args.tTD + '-dt.' + args.dt + attribute.generateExtension( ) + ".tree" + args.treeType if os.path.isfile(lTreeFileName) and (args.skipT.lower() == "yes"): continue else: lAllFilePresent = False break if lAllFilePresent == False: rCodeGen.ToReadTargetFile(rScript, config) rCodeGen.ForWtVectorGeneration(rScript, args.wt.lower()) for target in config['target']: rCodeGen.ToReadFeatureFiles(rScript, config, target) rCodeGen.ToReadPredictionFiles(rScript, config, target, configInit) rCodeGen.ForSanityChecks(rScript, config, target) lTreeFileName = dirName + "/" + algo + target + '-td.' + os.path.basename( os.path.abspath(args.td) ) + "-tTD" + args.tTD + '-dt.' + args.dt + attribute.generateExtension( ) + ".tree" + args.treeType if os.path.isfile(lTreeFileName) and (args.skipT.lower() == "yes"): print "Model File " + lTreeFileName + " already exists . So it will not be formed again . If you want to re-generate model then re-run with -skipT=No" else: rCodeGen.ToRenameDataBeforeTraining(rScript, config, target) rCodeGen.ForTrainingTree(rScript, args, config, target, args.treeType) print lTreeFileName rCodeGen.saveTrainingTree(rScript, args, dirName, target, lTreeFileName) rScript.close() print "Finished generating R training program: " + rProgLocation os.system("chmod +x " + rProgLocation)
def main(): parser = argparse.ArgumentParser( description= 'Generates predict.r which will use design.model to make predictions. Sample command is pGenForE.py -e ob/e1/' ) parser.add_argument('-e', required=True, help='Directory to find the experiement designs') parser.add_argument('-a', required=True, help='Algorithm name') parser.add_argument( '-s', required=True, help='Location of the subfolder that contains the sub experiments') parser.add_argument('-pd', required=True, help='Prediction directory') parser.add_argument('-td', required=True, help="Day on which it was trained") parser.add_argument('-dt', required=True, help="Number of days it was trained") parser.add_argument( '-targetClass', required=True, help= "For which model was used ; binomial(target takes only true and false) / multinomial (target values takes more than 2 values)" ) parser.add_argument( '-skipP', required=False, help= "yes or no , If you want to regenerate already generated algorithm prediction file then make this value No" ) parser.add_argument( '-wt', required=False, help="default/exp , weight type to be given to different days") parser.add_argument('-iT', required=False, help='Instrument name') parser.add_argument('-sP', required=False, help='Strike price of instrument') parser.add_argument('-oT', required=False, help='Options Type') args = parser.parse_args() attribute.initializeInstDetails(args.iT, args.sP, args.oT) if args.skipP == None: args.skipP = "yes" print "\nRunning pGen.py to generate the predict script" print "Using the experiment folder " + args.e config = ConfigObj(args.e + "/design.ini") print "The config parameters that I am working with are" print config dirName = os.path.dirname(args.e) if args.a is None: algo = 'glmnet' else: algo = args.a args.s = args.s + "/" predictDataDirectoryName = args.pd.replace('/ro/', '/wf/') predictDataDirectoryName = predictDataDirectoryName + "/p/" + os.path.basename( os.path.dirname(args.e)) if not os.path.exists(predictDataDirectoryName): os.mkdir(predictDataDirectoryName) rProgName = "predict" + algo + "-td." + os.path.basename(os.path.abspath(args.td)) + "-dt." + args.dt + "-pd." + os.path.basename(os.path.abspath(args.pd)) \ + "-wt." + args.wt + attribute.generateExtension() +"-For"+os.path.basename(os.path.dirname(args.s))+"SubE.r" rProgLocation = dirName + '/' + rProgName rScript = open(rProgLocation, 'w') rScript.write('#!/usr/bin/Rscript \n') if (args.a == 'glmnet'): rScript.write('require (glmnet) \n') elif (args.a == 'randomForest'): rScript.write('require (randomForest) \n') rCodeGen.ForSetUpChecks(rScript) for target in config['target']: rCodeGen.ToReadFeatureFiles(rScript, config, target) rCodeGen.ForSanityChecks(rScript, config, target) designFiles = utility.list_files(args.s) for designFile in designFiles: print "Generating r code for " + designFile rScript.write('\n\nprint ("Running r code for' + designFile + '")\n') config = ConfigObj(designFile) for target in config['target']: predictionFileName = predictDataDirectoryName + "/" + args.a + target + '-td.' + os.path.basename(os.path.abspath(args.td)) + '-dt.' + args.dt +\ '-targetClass.' + args.targetClass + '-f.' + os.path.basename(os.path.dirname(designFile)) + "-wt." + args.wt+ attribute.generateExtension() +".predictions" if not os.path.isfile(predictionFileName) or (args.skipP.lower() == "no"): rCodeGen.ForPredictions(rScript, config, args, designFile, target) else: print predictionFileName + "Already exists , not generating it again . If you want to generate it again then rerun it with -skipP no " rScript.write('rm(list=ls())') rScript.close() print "Finished generating R prediction program: " + rProgLocation os.system("chmod +x " + rProgLocation)