def saveTrainingModel(rScript, args, path, pTargetVariableKey, pDouble="", treeOrNot="", treeFileName=""): algo = getAlgoName(args) if len(pDouble) == 0: outputFileName = path+'/'+algo+pTargetVariableKey+ '-td.' + os.path.basename(os.path.abspath(args.td)) + '-dt.' + args.dt + '-targetClass.' + \ args.targetClass + "-wt." + args.wt+ attribute.generateExtension() +'.model' modelValueFileName = path+'/'+algo+ '-td.' + os.path.basename(os.path.abspath(args.td)) + '-dt.' + args.dt + '-targetClass.' + \ args.targetClass + "-wt." + args.wt+ attribute.generateExtension() +'.coef' else: outputFileName = path+'/'+algo+pTargetVariableKey+ '-td.' + os.path.basename(os.path.abspath(args.td)) + '-dt.' + args.dt + '-targetClass.' + \ args.targetClass + "-wt." + args.wt+ attribute.generateExtension() +'double.model' modelValueFileName = path+'/'+algo+ '-td.' + os.path.basename(os.path.abspath(args.td)) + '-dt.' + args.dt + '-targetClass.' + \ args.targetClass + "-wt." + args.wt+ attribute.generateExtension() +'double.coef' rScript.write('\nprint (paste("Section8: Saving the model in file ' + outputFileName + '")) \n') rScript.write('save(fit, file = "' + outputFileName + '")\n') rScript.write('l = coef(fit, s = "lambda.min")\n') rScript.write('string_intercept = paste("' + pTargetVariableKey + '" , "-intercept-value = ",toString(l[1]),"\\n",sep="")\n') rScript.write( 'string_intercept = paste(string_intercept,"vector-of-alphas-' + pTargetVariableKey + ' = ",sep="")\n') rScript.write('for(i in 2:length(l)){\n') rScript.write( ' string_intercept = paste(string_intercept,l[i],",",sep="")\n') rScript.write('}\n') rScript.write('string_intercept = paste(string_intercept,"\\n",sep="")\n') rScript.write('cat(string_intercept,file="' + modelValueFileName + '",sep="",append=TRUE)\n')
def ToReadTargetFile(rScript,config): rScript.write('print ("Section2: Read target files") \n') lTargetSet = config["target"] rScript.write('lDirectorySet<-strsplit(args[2],";",fixed=TRUE,useBytes=FALSE)\n') for target in lTargetSet: userFriendlyName = lTargetSet[target] userFriendlyName = userFriendlyName.replace('[','') userFriendlyName = userFriendlyName.replace(']','') fileToRead = lTargetSet[target]+ attribute.generateExtension() rScript.write('lengthOfEachDay = numeric()\n') rScript.write('lFlag=FALSE\n') rScript.write('for (file in lDirectorySet[[1]]){\n') rScript.write(' if (!lFlag){\n') rScript.write(' load(paste(file,"/t/'+fileToRead+'.bin",sep=""))\n') rScript.write(' ' + target+'<- ' + userFriendlyName + skipRowCode + '\n') rScript.write(' rm(' + userFriendlyName + ')\n') rScript.write(' lengthOfEachDay = c(lengthOfEachDay,nrow(' + target + '))\n') rScript.write(' lFlag=TRUE\n') rScript.write(' }\n') rScript.write(' else{\n') rScript.write(' load(paste(file,"/t/'+fileToRead+'.bin",sep=""))\n') rScript.write(' temp<-' + userFriendlyName + skipRowCode + '\n') rScript.write(' rm(' + userFriendlyName + ')\n') rScript.write(' lengthOfEachDay = c(lengthOfEachDay,nrow(temp))\n') rScript.write(' '+target+'<-rbind('+target+',temp)\n') rScript.write(' rm(temp)\n') rScript.write(' }\n') rScript.write(' print ("Reading '+ fileToRead +'.target' + '") \n') rScript.write('}\n')
def scriptWrapperForDayWiseCorrelation(pTrainingDay): lDate = os.path.basename(os.path.abspath(pTrainingDay)) lFileName = l_exp_dir + "/corr-date-" + lDate + "-td." + os.path.basename( os.path.abspath(args.td) ) + "-dt." + args.dt + attribute.generateExtension() + ".r" utility.runCommand([lFileName, '-d', pTrainingDay], args.run, args.sequence)
def main(): parser = argparse.ArgumentParser( description= 'Generates train.r. A sample command is :- src/corrRGenForE.py -e ob/e/nsefut/CorExpHINDALCO/ -td ob/data/ro/nsefut/20141017/ -dt 10 -iT HINDALCO -sP -1 -oT 0' ) parser.add_argument( '-e', required=True, help='Experiement folder to use to find the features and targets') parser.add_argument('-td', required=True, help="Day on which it was trained") parser.add_argument('-dt', required=True, help="Number of days it was trained") parser.add_argument('-iT', required=False, help='Instrument name') parser.add_argument('-sP', required=False, help='Strike price of instrument') parser.add_argument('-oT', required=False, help='Options Type') args = parser.parse_args() attribute.initializeInstDetails(args.iT, args.sP, args.oT) print "Using the experiment folder " + args.e print args.e + "/design.ini" config = ConfigObj(args.e + "/design.ini") print "The config parameters that I am working with are" print config dirName = os.path.dirname(args.e) + "/" rProgName = "corr-td." + os.path.basename(os.path.abspath( args.td)) + "-dt." + args.dt + attribute.generateExtension() + ".r" rProgLocation = dirName + '/' + rProgName rScript = open(rProgLocation, 'w') rScript.write('#!/usr/bin/Rscript \n') rCodeGen.ForSetUpChecks(rScript) lCorrelationFileName = dirName + '/correlation-coef' + '-td.' + os.path.basename( os.path.abspath(args.td) ) + '-dt.' + args.dt + attribute.generateExtension() + ".coef" rCodeGen.ToReadTargetFile(rScript, config) for target in config['target']: rCodeGen.ToFindCorrelationAndPrintingToFile(rScript, config, target, lCorrelationFileName) rScript.close() print "Finished generating R training program: " + rProgLocation os.system("chmod +x " + rProgLocation)
def getTargetValuesIntoDict(pTargetValuesDict): # The following will take care if args.e = "ob/e1/" or args.e = "ob/e1" dirName = args.d.replace('/ro/', '/wf/') config = ConfigObj(args.e + "/design.ini") target = config["target"] lTargetBuyValuesDict = dict() targetBuyValuesFileName = dirName + "/t/" + target[ 'buy'] + attribute.generateExtension() + ".target" print("Buy Target values file : " + targetBuyValuesFileName) sys.stdout.flush() targetBuyValuesFile = open(targetBuyValuesFileName) fileHasHeader = True numberOfLinesInBuyTargetValuesFile = functionToReadTargetFileToDictionary( targetBuyValuesFile, lTargetBuyValuesDict, fileHasHeader) print("Finished reading the buy target values file") print("The number of elements in the buy target values dictionary is : " + str(len(lTargetBuyValuesDict))) # if (numberOfLinesInBuyTargetValuesFile != len(lTargetBuyValuesDict)): # print("Number of duplicate time stamps rejected in buy target values dictionary = " + str(numberOfLinesInBuyTargetValuesFile - len(lTargetBuyValuesDict))) # os._exit(-1) sys.stdout.flush() lTargetSellValuesDict = dict() targetSellValuesFileName = dirName + "/t/" + target[ 'sell'] + attribute.generateExtension() + ".target" print("Sell Target values file : " + targetSellValuesFileName) sys.stdout.flush() targetSellValuesFile = open(targetSellValuesFileName) fileHasHeader = True numberOfLinesInSellTargetValuesFile = functionToReadTargetFileToDictionary( targetSellValuesFile, lTargetSellValuesDict, fileHasHeader) print("Finished reading the sell target values file") print("The number of elements in the sell target values dictionary is : " + str(len(lTargetSellValuesDict))) # if (numberOfLinesInSellTargetValuesFile != len(lTargetSellValuesDict)): # print("Number of duplicate timestamps rejected in sell target values dictionary = " + str(numberOfLinesInSellTargetValuesFile - len(lTargetSellValuesDict))) # os._exit(-1) sys.stdout.flush() #-----------------Getting target values into dictionary ------------------------------------- for elements in lTargetBuyValuesDict.keys(): pTargetValuesDict[elements] = {} pTargetValuesDict[elements]['buy'] = lTargetBuyValuesDict[elements] pTargetValuesDict[elements]['sell'] = lTargetSellValuesDict[elements]
def getPredictCommandList(experimentFolder,algoName,predictFolder,trainFolder,pNumberOfDays,pWtsTaken): commandList = list() # lets make a list of all the scripts that need to be run predictScriptNames = glob.glob(experimentFolder+"/predict" + algoName + "-td." + os.path.basename(os.path.abspath(trainFolder))\ + "-dt." + pNumberOfDays + "-pd." + os.path.basename(os.path.abspath(predictFolder)) +"-wt."\ + pWtsTaken + attribute.generateExtension() + "-For*.r") dirName = predictFolder.replace('/ro/','/wf/') for predictScriptName in predictScriptNames: commandList.append([predictScriptName,"-d",dirName]) return commandList
def getPredictCommandList(experimentFolder, algoName, predictFolder, trainFolder, pNumberOfDays, pWtsTaken): commandList = list() # lets make a list of all the scripts that need to be run predictScriptNames = glob.glob(experimentFolder+"/predict" + algoName + "-td." + os.path.basename(os.path.abspath(trainFolder))\ + "-dt." + pNumberOfDays + "-pd." + os.path.basename(os.path.abspath(predictFolder)) +"-wt."\ + pWtsTaken + attribute.generateExtension() + "-For*.r") dirName = predictFolder.replace('/ro/', '/wf/') for predictScriptName in predictScriptNames: commandList.append([predictScriptName, "-d", dirName]) return commandList
def getTrainCommandList(experimentFolder,algoName,trainFolder,pNumberOfDays,pWtsTaken): commandList = list() # lets make a list of all the scripts that need to be run trainScriptNames = glob.glob(experimentFolder+"/train" + algoName + "-td." + os.path.basename(os.path.abspath(trainFolder)) + \ "-dt." + pNumberOfDays +"-wt." + pWtsTaken + attribute.generateExtension() +"-For*.r") dirName = trainFolder.replace('/ro/','/wf/') trainingDataList = attribute.getListOfTrainingDirectoriesNames(args.dt,dirName,args.iT) trainingDataListString = ";".join(trainingDataList) # if len(trainingDataList)>1: # trainingDataListString = "\"" + trainingDataListString + "\"" for trainScriptName in trainScriptNames: commandList.append([trainScriptName,"-d",trainingDataListString]) return commandList
def saveTrainingModel(rScript,args,path,pTargetVariableKey,pDouble="", treeOrNot = "", treeFileName = ""): algo = getAlgoName(args) if len(pDouble)==0: outputFileName = path+'/'+algo+pTargetVariableKey+ '-td.' + os.path.basename(os.path.abspath(args.td)) + '-dt.' + args.dt + '-targetClass.' + \ args.targetClass + "-wt." + args.wt+ attribute.generateExtension() +'.model' modelValueFileName = path+'/'+algo+ '-td.' + os.path.basename(os.path.abspath(args.td)) + '-dt.' + args.dt + '-targetClass.' + \ args.targetClass + "-wt." + args.wt+ attribute.generateExtension() +'.coef' else: outputFileName = path+'/'+algo+pTargetVariableKey+ '-td.' + os.path.basename(os.path.abspath(args.td)) + '-dt.' + args.dt + '-targetClass.' + \ args.targetClass + "-wt." + args.wt+ attribute.generateExtension() +'double.model' modelValueFileName = path+'/'+algo+ '-td.' + os.path.basename(os.path.abspath(args.td)) + '-dt.' + args.dt + '-targetClass.' + \ args.targetClass + "-wt." + args.wt+ attribute.generateExtension() +'double.coef' rScript.write('\nprint (paste("Section8: Saving the model in file '+ outputFileName +'")) \n') rScript.write('save(fit, file = "'+ outputFileName+'")\n') rScript.write('l = coef(fit, s = "lambda.min")\n') rScript.write('string_intercept = paste("' + pTargetVariableKey + '" , "-intercept-value = ",toString(l[1]),"\\n",sep="")\n') rScript.write('string_intercept = paste(string_intercept,"vector-of-alphas-'+ pTargetVariableKey + ' = ",sep="")\n') rScript.write('for(i in 2:length(l)){\n') rScript.write(' string_intercept = paste(string_intercept,l[i],",",sep="")\n') rScript.write('}\n') rScript.write('string_intercept = paste(string_intercept,"\\n",sep="")\n') rScript.write('cat(string_intercept,file="'+ modelValueFileName + '",sep="",append=TRUE)\n')
def ToFindCorrelationAndPrintingToFile(rScript, config, pTargetVariableKey, pFileName): features = config["features-" + pTargetVariableKey] rScript.write('\nprint ("Section6: To Find Correlation For ' + pTargetVariableKey + '") \n') rScript.write('string_intercept = paste("CorrelationCoeficient Of ","' + pTargetVariableKey + '" , ":- ","\\n",sep="")\n') rScript.write('cat(string_intercept,file="' + pFileName + '",sep="",append=TRUE)\n') for feature in features: userFriendlyName = features[feature] userFriendlyName = userFriendlyName.replace('[', '') userFriendlyName = userFriendlyName.replace(']', '') userFriendlyName = userFriendlyName.replace('(', '') userFriendlyName = userFriendlyName.replace(')', '') featureNameWithoutBrackets = features[feature].replace( '(', '').replace(')', '') + attribute.generateExtension() rScript.write('lFlag=FALSE\n') rScript.write('for (file in lDirectorySet[[1]]){\n') rScript.write(' if (!lFlag){\n') rScript.write(' load(paste(file,"/f/' + featureNameWithoutBrackets + '.bin",sep=""))\n') rScript.write(' ' + feature + pTargetVariableKey + '<-get("' + userFriendlyName + '")' + skipRowCode + ' \n') rScript.write(' rm("' + userFriendlyName + '")\n') rScript.write(' lFlag=TRUE\n') rScript.write(' }\n') rScript.write(' else {\n') rScript.write(' load(paste(file,"/f/' + featureNameWithoutBrackets + '.bin",sep=""))\n') rScript.write(' temp<-get("' + userFriendlyName + '")' + skipRowCode + '\n') rScript.write(' rm("' + userFriendlyName + '")\n') rScript.write(' ' + feature + pTargetVariableKey + '<-rbind(' + feature + pTargetVariableKey + ',temp)\n') rScript.write(' rm(temp)\n') rScript.write(' }\n') rScript.write(' print ("Reading ' + featureNameWithoutBrackets + '.feature' + '") \n') rScript.write('}\n') userFriendlyName = features[feature] rScript.write('tempCor <- cor(' + pTargetVariableKey + '[,2] , ' + feature + pTargetVariableKey + '[,2] )\n') rScript.write('string_intercept = paste("' + userFriendlyName + '" ,"=",toString(tempCor),"\\n",sep="")\n') rScript.write('cat(string_intercept,file="' + pFileName + '",sep="",append=TRUE)\n') rScript.write('rm(' + feature + pTargetVariableKey + ')\n') rScript.write('string_intercept = paste("\\n","\\n",sep="")\n') rScript.write('cat(string_intercept,file="' + pFileName + '",sep="",append=TRUE)\n')
def getTargetValuesIntoDict(pTargetValuesDict): # The following will take care if args.e = "ob/e1/" or args.e = "ob/e1" dirName = args.d.replace('/ro/','/wf/') config = ConfigObj(args.e+"/design.ini") target = config["target"] lTargetBuyValuesDict = dict() targetBuyValuesFileName = dirName+"/t/" + target['buy']+ attribute.generateExtension() + ".target" print("Buy Target values file : "+ targetBuyValuesFileName) sys.stdout.flush() targetBuyValuesFile = open(targetBuyValuesFileName) fileHasHeader = True numberOfLinesInBuyTargetValuesFile = functionToReadTargetFileToDictionary(targetBuyValuesFile,lTargetBuyValuesDict,fileHasHeader) print("Finished reading the buy target values file") print("The number of elements in the buy target values dictionary is : " + str(len(lTargetBuyValuesDict))) # if (numberOfLinesInBuyTargetValuesFile != len(lTargetBuyValuesDict)): # print("Number of duplicate time stamps rejected in buy target values dictionary = " + str(numberOfLinesInBuyTargetValuesFile - len(lTargetBuyValuesDict))) # os._exit(-1) sys.stdout.flush() lTargetSellValuesDict = dict() targetSellValuesFileName = dirName+"/t/" + target['sell']+ attribute.generateExtension() + ".target" print("Sell Target values file : "+ targetSellValuesFileName) sys.stdout.flush() targetSellValuesFile = open(targetSellValuesFileName) fileHasHeader = True numberOfLinesInSellTargetValuesFile = functionToReadTargetFileToDictionary(targetSellValuesFile,lTargetSellValuesDict,fileHasHeader) print("Finished reading the sell target values file") print("The number of elements in the sell target values dictionary is : " + str(len(lTargetSellValuesDict))) # if (numberOfLinesInSellTargetValuesFile != len(lTargetSellValuesDict)): # print("Number of duplicate timestamps rejected in sell target values dictionary = " + str(numberOfLinesInSellTargetValuesFile - len(lTargetSellValuesDict))) # os._exit(-1) sys.stdout.flush() #-----------------Getting target values into dictionary ------------------------------------- for elements in lTargetBuyValuesDict.keys(): pTargetValuesDict[elements] = {} pTargetValuesDict[elements]['buy'] = lTargetBuyValuesDict[elements] pTargetValuesDict[elements]['sell'] = lTargetSellValuesDict[elements]
def getTrainCommandList(experimentFolder, algoName, trainFolder, pNumberOfDays, pWtsTaken): commandList = list() # lets make a list of all the scripts that need to be run trainScriptNames = glob.glob(experimentFolder+"/train" + algoName + "-td." + os.path.basename(os.path.abspath(trainFolder)) + \ "-dt." + pNumberOfDays +"-wt." + pWtsTaken + attribute.generateExtension() +"-For*.r") dirName = trainFolder.replace('/ro/', '/wf/') trainingDataList = attribute.getListOfTrainingDirectoriesNames( args.dt, dirName, args.iT) trainingDataListString = ";".join(trainingDataList) # if len(trainingDataList)>1: # trainingDataListString = "\"" + trainingDataListString + "\"" for trainScriptName in trainScriptNames: commandList.append([trainScriptName, "-d", trainingDataListString]) return commandList
def ForLoadingModel(rScript,args,path,pTargetVariableKey,config): features = config["features-"+pTargetVariableKey] if(args.a == 'glmnet'): rScript.write('print ("Section7: Running glmnet") \n') rScript.write('X <- cbind(') currentFeatureNumber=0 for feature in features: rScript.write(features.keys()[currentFeatureNumber]+pTargetVariableKey+'[,2]') currentFeatureNumber = currentFeatureNumber+1 if(len(features) > currentFeatureNumber): rScript.write(',') rScript.write(')\n') predictionModel = args.a + pTargetVariableKey + '-td.' + os.path.basename(os.path.abspath(args.td)) + '-dt.' + args.dt + '-targetClass.' + args.targetClass +\ "-wt." + args.wt+ attribute.generateExtension() + '.model' rScript.write('load("'+os.path.dirname(path)+'/'+predictionModel+'")\n')
def ForLoadingModel(rScript, args, path, pTargetVariableKey, config): features = config["features-" + pTargetVariableKey] if (args.a == 'glmnet'): rScript.write('print ("Section7: Running glmnet") \n') rScript.write('X <- cbind(') currentFeatureNumber = 0 for feature in features: rScript.write(features.keys()[currentFeatureNumber] + pTargetVariableKey + '[,2]') currentFeatureNumber = currentFeatureNumber + 1 if (len(features) > currentFeatureNumber): rScript.write(',') rScript.write(')\n') predictionModel = args.a + pTargetVariableKey + '-td.' + os.path.basename(os.path.abspath(args.td)) + '-dt.' + args.dt + '-targetClass.' + args.targetClass +\ "-wt." + args.wt+ attribute.generateExtension() + '.model' rScript.write('load("' + os.path.dirname(path) + '/' + predictionModel + '")\n')
def ToReadFeatureFiles(rScript, config, targetVariable, pUseWhichArgumentForData=2): features = config["features-" + targetVariable] rScript.write('\nprint ("Section3: Read feature files") \n') if pUseWhichArgumentForData == 4: rScript.write( 'lDirectorySet<-strsplit(args[4],";",fixed=TRUE,useBytes=FALSE)\n') else: rScript.write( 'lDirectorySet<-strsplit(args[2],";",fixed=TRUE,useBytes=FALSE)\n') for feature in features: userFriendlyName = features[feature] userFriendlyName = userFriendlyName.replace('[', '') userFriendlyName = userFriendlyName.replace(']', '') userFriendlyName = userFriendlyName.replace('(', '') userFriendlyName = userFriendlyName.replace(')', '') featureNameWithoutBrackets = features[feature].replace( '(', '').replace(')', '') + attribute.generateExtension() rScript.write('lFlag=FALSE\n') rScript.write('for (file in lDirectorySet[[1]]){\n') rScript.write(' if (!lFlag){\n') rScript.write(' load(paste(file,"/f/' + featureNameWithoutBrackets + '.bin",sep=""))\n') rScript.write(' ' + feature + targetVariable + '<-get("' + userFriendlyName + '")' + skipRowCode + ' \n') rScript.write(' rm("' + userFriendlyName + '")\n') rScript.write(' lFlag=TRUE\n') rScript.write(' }\n') rScript.write(' else {\n') rScript.write(' load(paste(file,"/f/' + featureNameWithoutBrackets + '.bin",sep=""))\n') rScript.write(' temp<-get("' + userFriendlyName + '")' + skipRowCode + '\n') rScript.write(' rm("' + userFriendlyName + '")\n') rScript.write(' ' + feature + targetVariable + '<-rbind(' + feature + targetVariable + ',temp)\n') rScript.write(' rm(temp)\n') rScript.write(' }\n') rScript.write(' print ("Reading ' + featureNameWithoutBrackets + '.feature' + '") \n') rScript.write('}\n')
def ToReadTargetFile(rScript, config): rScript.write('print ("Section2: Read target files") \n') lTargetSet = config["target"] rScript.write( 'lDirectorySet<-strsplit(args[2],";",fixed=TRUE,useBytes=FALSE)\n') for target in lTargetSet: userFriendlyName = lTargetSet[target] userFriendlyName = userFriendlyName.replace('[', '') userFriendlyName = userFriendlyName.replace(']', '') fileToRead = lTargetSet[target] + attribute.generateExtension() rScript.write('lengthOfEachDay = numeric()\n') rScript.write('lFlag=FALSE\n') rScript.write('for (file in lDirectorySet[[1]]){\n') rScript.write(' if (!lFlag){\n') rScript.write(' load(paste(file,"/t/' + fileToRead + '.bin",sep=""))\n') rScript.write(' ' + target + '<- ' + userFriendlyName + skipRowCode + '\n') rScript.write(' rm(' + userFriendlyName + ')\n') rScript.write(' lengthOfEachDay = c(lengthOfEachDay,nrow(' + target + '))\n') rScript.write(' lFlag=TRUE\n') rScript.write(' }\n') rScript.write(' else{\n') rScript.write(' load(paste(file,"/t/' + fileToRead + '.bin",sep=""))\n') rScript.write(' temp<-' + userFriendlyName + skipRowCode + '\n') rScript.write(' rm(' + userFriendlyName + ')\n') rScript.write( ' lengthOfEachDay = c(lengthOfEachDay,nrow(temp))\n') rScript.write(' ' + target + '<-rbind(' + target + ',temp)\n') rScript.write(' rm(temp)\n') rScript.write(' }\n') rScript.write(' print ("Reading ' + fileToRead + '.target' + '") \n') rScript.write('}\n')
def main(): parser = argparse.ArgumentParser( description= 'Generates predict.r which will use design.model to make predictions. Sample command is pGenForE.py -e ob/e1/' ) parser.add_argument('-e', required=True, help='Directory to find the experiement designs') parser.add_argument('-a', required=True, help='Algorithm name') parser.add_argument( '-s', required=True, help='Location of the subfolder that contains the sub experiments') parser.add_argument('-pd', required=True, help='Prediction directory') parser.add_argument('-td', required=True, help="Day on which it was trained") parser.add_argument('-dt', required=True, help="Number of days it was trained") parser.add_argument( '-targetClass', required=True, help= "For which model was used ; binomial(target takes only true and false) / multinomial (target values takes more than 2 values)" ) parser.add_argument( '-skipP', required=False, help= "yes or no , If you want to regenerate already generated algorithm prediction file then make this value No" ) parser.add_argument( '-wt', required=False, help="default/exp , weight type to be given to different days") parser.add_argument('-iT', required=False, help='Instrument name') parser.add_argument('-sP', required=False, help='Strike price of instrument') parser.add_argument('-oT', required=False, help='Options Type') args = parser.parse_args() attribute.initializeInstDetails(args.iT, args.sP, args.oT) if args.skipP == None: args.skipP = "yes" print "\nRunning pGen.py to generate the predict script" print "Using the experiment folder " + args.e config = ConfigObj(args.e + "/design.ini") print "The config parameters that I am working with are" print config dirName = os.path.dirname(args.e) if args.a is None: algo = 'glmnet' else: algo = args.a args.s = args.s + "/" predictDataDirectoryName = args.pd.replace('/ro/', '/wf/') predictDataDirectoryName = predictDataDirectoryName + "/p/" + os.path.basename( os.path.dirname(args.e)) if not os.path.exists(predictDataDirectoryName): os.mkdir(predictDataDirectoryName) rProgName = "predict" + algo + "-td." + os.path.basename(os.path.abspath(args.td)) + "-dt." + args.dt + "-pd." + os.path.basename(os.path.abspath(args.pd)) \ + "-wt." + args.wt + attribute.generateExtension() +"-For"+os.path.basename(os.path.dirname(args.s))+"SubE.r" rProgLocation = dirName + '/' + rProgName rScript = open(rProgLocation, 'w') rScript.write('#!/usr/bin/Rscript \n') if (args.a == 'glmnet'): rScript.write('require (glmnet) \n') elif (args.a == 'randomForest'): rScript.write('require (randomForest) \n') rCodeGen.ForSetUpChecks(rScript) for target in config['target']: rCodeGen.ToReadFeatureFiles(rScript, config, target) rCodeGen.ForSanityChecks(rScript, config, target) designFiles = utility.list_files(args.s) for designFile in designFiles: print "Generating r code for " + designFile rScript.write('\n\nprint ("Running r code for' + designFile + '")\n') config = ConfigObj(designFile) for target in config['target']: predictionFileName = predictDataDirectoryName + "/" + args.a + target + '-td.' + os.path.basename(os.path.abspath(args.td)) + '-dt.' + args.dt +\ '-targetClass.' + args.targetClass + '-f.' + os.path.basename(os.path.dirname(designFile)) + "-wt." + args.wt+ attribute.generateExtension() +".predictions" if not os.path.isfile(predictionFileName) or (args.skipP.lower() == "no"): rCodeGen.ForPredictions(rScript, config, args, designFile, target) else: print predictionFileName + "Already exists , not generating it again . If you want to generate it again then rerun it with -skipP no " rScript.write('rm(list=ls())') rScript.close() print "Finished generating R prediction program: " + rProgLocation os.system("chmod +x " + rProgLocation)
def main(): dataFile.getDataIntoMatrix(args.pd) predictedValuesDict = dict() getPredictedValuesIntoDict(predictedValuesDict) lower_entry = totalEntryCL[0] upper_entry = totalEntryCL[1] lower_exit = totalExitCL[0] l_index_gap = 0.2 l_max_gap = (upper_entry - lower_entry)/(4) while l_index_gap < l_max_gap: exitCL2 = lower_exit + float(l_index_gap) exitCL1 = lower_exit + 2*float(l_index_gap) exitCL0 = lower_exit + 3*float(l_index_gap) exitCL = lower_exit + 4*float(l_index_gap) if(lower_entry < exitCL): lower_entry = lower_exit + 4*float(l_index_gap) entryCL2 = lower_entry + float(l_index_gap) entryCL1 = lower_entry + 2*float(l_index_gap) entryCL0 = lower_entry + 3*float(l_index_gap) entryCL = lower_entry + 4*float(l_index_gap) l_entry_list = [entryCL2,entryCL1,entryCL0 , entryCL] l_exit_list = [exitCL2,exitCL1,exitCL0 , exitCL] print("Entry Exit list" + str(l_entry_list)+ str(l_exit_list)) if(exitCL < entryCL2 and entryCL <= upper_entry): lInitialFileName = args.a + '-td.' + os.path.basename(os.path.abspath(args.td)) + \ '-dt.' + args.dt + '-targetClass.' + args.targetClass + '-f.' + experimentName + "-wt." + args.wt+ attribute.generateExtension() + \ '-l.'+entryCL+"-"+entryCL0+"-"+entryCL1+"-"+entryCL2+"-"+exitCL +"-"+exitCL0 +"-"+exitCL1 +"-"+exitCL2 + "-tq." + args.orderQty + "-te.10" initialFileName.append(lInitialFileName) for lFileName in initialFileName: readOnceAndWrite(lFileName, l_entry_list, l_exit_list, predictedValuesDict) else: l_index_gap = l_index_gap + 0.1
gEntryCLList = args.entryCL.split(";") gExitCLList = args.exitCL.split(";") gStandingAtAskPMinusOneTickInCloseSell = 0 gStandingAtBidPPlusOneTickInCloseBuy = 0 gPipsTaken = int(args.pipTaken) gOpenBuyFillPrice = 0 gOpenSellFillPrice = 0 initialFileName = [] for indexOfCL in range(0,len(gEntryCLList)): lInitialFileName = args.a + '-td.' + os.path.basename(os.path.abspath(args.td)) + \ '-dt.' + args.dt + '-targetClass.' + args.targetClass + '-f.' + experimentName + "-wt." + args.wt+ "-pipTaken." + args.pipTaken+ attribute.generateExtension() + \ '-l.'+gEntryCLList[indexOfCL]+"-"+gExitCLList[indexOfCL] + "-tq." + args.orderQty + "-te.13" initialFileName.append(lInitialFileName) g_quantity_adjustment_list_for_sell = {} g_quantity_adjustment_list_for_buy = {} g_bestqty_list_for_sell = {} g_bestqty_list_for_buy = {} class ticks_values_to_be_stored(object): def __init__(self): self.MsgCode = '' self.OrderType = '' self.NewP = 0.0 self.NewQ = 0 self.OldP = 0.0
def main(): parser = argparse.ArgumentParser(description='Generates train.r. A sample command is mGenForE.py -e ob/e1/ ') parser.add_argument('-e', required=True,help='Experiement folder to use to find the features and targets') parser.add_argument('-a', required=True,help='Algorithm name') parser.add_argument('-targetClass',required=True,help="binomial(target takes only true and false) / multinomial (target values takes more than 2 values)") parser.add_argument('-skipT',required=False,help="yes or no , If you want to regenerate already generated algorithm model file then make this value No") parser.add_argument('-td',required=True,help="Day on which it was trained") parser.add_argument('-dt',required=True,help="Number of days it was trained") parser.add_argument('-wt',required=True,help="default/exp , weight type to be given to different days") parser.add_argument('-iT',required=False,help='Instrument name') parser.add_argument('-sP',required=False,help='Strike price of instrument') parser.add_argument('-oT',required=False,help='Options Type') parser.add_argument('-treeType',required=False,help="Tree read for trade engine") parser.add_argument('-tTD',required=False,help="Tree number of days to be used") args = parser.parse_args() attribute.initializeInstDetails(args.iT,args.sP,args.oT) if args.skipT == None: args.skipT = "yes" if args.tTD == None: args.tTD = args.dt print "Using the experiment folder " + args.e config = ConfigObj(args.e+"/design1.ini") configInit = ConfigObj(args.e+"design.ini") # configInitList = [] # for iniFile in os.listdir(args.e + "/"): # if '.ini' in iniFile and iniFile != 'design.ini': # index = iniFile[ file.index(".") - 1 ] # configInitList.append( ( index, ConfigObj(args.e+"/"+iniFile) ) ) # configInit = dict(configInitList) print "The config parameters that I am working with are" print config dirName=os.path.dirname(args.e)+"/" algo = rCodeGen.getAlgoName(args) rProgName = "traintree" + "-td." + os.path.basename(os.path.abspath(args.td)) + "-tTD" + args.tTD + "-dt." + args.dt + "-wt." + args.wt + attribute.generateExtension() +".r" rProgLocation = dirName+'/'+rProgName rScript = open(rProgLocation,'w') rScript.write('#!/usr/bin/Rscript \n') rScript.write('require (rpart) \n') rCodeGen.ForSetUpChecks(rScript) lAllFilePresent = True for target in config['target']: lTreeFileName = dirName+"/"+algo+ target+'-td.' + os.path.basename(os.path.abspath(args.td)) + "-tTD" + args.tTD + '-dt.' + args.dt + attribute.generateExtension() +".tree" + args.treeType if os.path.isfile(lTreeFileName) and ( args.skipT.lower() == "yes" ): continue else: lAllFilePresent = False break if lAllFilePresent == False: rCodeGen.ToReadTargetFile(rScript,config) rCodeGen.ForWtVectorGeneration(rScript,args.wt.lower()) for target in config['target']: rCodeGen.ToReadFeatureFiles(rScript,config,target) rCodeGen.ToReadPredictionFiles(rScript,config,target,configInit) rCodeGen.ForSanityChecks(rScript,config,target) lTreeFileName = dirName+"/"+algo+ target+'-td.' + os.path.basename(os.path.abspath(args.td)) + "-tTD" + args.tTD +'-dt.' + args.dt + attribute.generateExtension() +".tree" + args.treeType if os.path.isfile(lTreeFileName) and ( args.skipT.lower() == "yes" ): print "Model File " + lTreeFileName + " already exists . So it will not be formed again . If you want to re-generate model then re-run with -skipT=No" else: rCodeGen.ToRenameDataBeforeTraining(rScript,config,target) rCodeGen.ForTrainingTree(rScript,args,config,target, args.treeType) print lTreeFileName rCodeGen.saveTrainingTree(rScript,args,dirName,target, lTreeFileName) rScript.close() print "Finished generating R training program: " + rProgLocation os.system("chmod +x "+rProgLocation)
def getPredictedValuesIntoDict(pPredictedValuesDict): # The following will take care if args.e = "ob/e1/" or args.e = "ob/e1" dirName = args.pd.replace('/ro/', '/wf/') config = ConfigObj(args.e + "/design.ini") target = config["target"] predictedValuesFileName = dirName+"/p/"+mainExperimentName+"/"+args.a + target.keys()[0] + '-td.' + os.path.basename(os.path.abspath(args.td)) + \ '-dt.' + args.dt + '-targetClass.' + args.targetClass + '-f.' + experimentName + "-wt." + args.wt+ attribute.generateExtension() + ".predictions" print("Predicted values file : " + predictedValuesFileName) sys.stdout.flush() predictedValuesFile = open(predictedValuesFileName) fileHasHeader = True numberOfLinesInPredictedValuesFile = 0 for line in predictedValuesFile: if fileHasHeader == True: fileHasHeader = False continue line = line.rstrip('\n') splitLine = line.split(',', 2) timeStamp = float(splitLine[1]) try: #TODO: remove this and then run the code to identify errors. predictedProb = float(splitLine[2]) except: predictedProb = 0 pPredictedValuesDict[timeStamp] = predictedProb numberOfLinesInPredictedValuesFile += 1 print("Finished reading the predicted values file") print("The number of elements in the predicted values dictionary is : " + str(len(pPredictedValuesDict))) if (numberOfLinesInPredictedValuesFile != len(pPredictedValuesDict)): print("Number of duplicate timestamps rejected = " + str(numberOfLinesInPredictedValuesFile - len(pPredictedValuesDict))) os._exit(-1) sys.stdout.flush()
def ForPredictions(rScript, config, args, pathToDesignFile, pTargetVariableKey, pUseWhichArgumentForData=2, pDouble=""): features = config["features-" + pTargetVariableKey] #Renaming all features if model and predictions are done simultaneously , so that training and prediction data set do not conflict algo = getAlgoName(args) if len(pDouble) == 0: predictionModel = algo + pTargetVariableKey + '-td.' + os.path.basename(os.path.abspath(args.td)) + '-dt.' + args.dt + '-targetClass.' + args.targetClass +\ "-wt." + args.wt+ attribute.generateExtension() + '.model' else: predictionModel = algo + pTargetVariableKey + '-td.' + os.path.basename(os.path.abspath(args.td)) + '-dt.' + args.dt + '-targetClass.' + args.targetClass +\ "-wt." + args.wt+ attribute.generateExtension() + 'double.model' rScript.write('\nprint ("Section6: Read in prediction model' + os.path.dirname(pathToDesignFile) + '/' + predictionModel + '") \n') rScript.write('load("' + os.path.dirname(pathToDesignFile) + '/' + predictionModel + '")') if (args.a == 'glmnet'): rScript.write('\n\nprint ("Section7: Creating data frame") \n') rScript.write('df = cbind(') currentFeatureNumber = 0 for feature in features: rScript.write(feature + pTargetVariableKey + '[,2]') currentFeatureNumber = currentFeatureNumber + 1 if (len(features) > currentFeatureNumber): rScript.write(',') rScript.write(")\n\n") rScript.write('print ("Section8: Running ' + args.a + ' prediction") \n') rScript.write( 'Prob <- predict (fit, newx = df,s = "lambda.min",type = "response")' ) rScript.write("\n\n") elif (args.a == 'logitr'): rScript.write('\n\nprint ("Section7: Creating the data frame") \n') rScript.write('df = data.frame(') currentFeatureNumber = 0 for feature in features: userFriendlyName = features[feature] userFriendlyName = userFriendlyName.replace('[', '') userFriendlyName = userFriendlyName.replace(']', '') userFriendlyName = userFriendlyName.replace('(', '') userFriendlyName = userFriendlyName.replace(')', '') rScript.write(userFriendlyName + '=' + feature + pTargetVariableKey + '[,2]') currentFeatureNumber = currentFeatureNumber + 1 if (len(features) > currentFeatureNumber): rScript.write(',') rScript.write(")\n\n") rScript.write('print ("Section8: Running ' + args.a + ' prediction") \n') rScript.write('Prob<- predict (fit, newdata = df, type = "response")') rScript.write("\n\n") elif (args.a == 'randomForest'): rScript.write('\n\nprint ("Section7: Creating the data frame") \n') rScript.write('df = data.frame(') currentFeatureNumber = 0 for feature in features: userFriendlyName = features[feature] userFriendlyName = userFriendlyName.replace('[', '') userFriendlyName = userFriendlyName.replace(']', '') userFriendlyName = userFriendlyName.replace('(', '') userFriendlyName = userFriendlyName.replace(')', '') rScript.write(userFriendlyName + '=' + feature + pTargetVariableKey + '[,2]') currentFeatureNumber = currentFeatureNumber + 1 if (len(features) > currentFeatureNumber): rScript.write(',') rScript.write(")\n\n") rScript.write('print ("Section8: Running ' + args.a + ' prediction") \n') rScript.write('Prob<- predict (fit, df)') rScript.write("\n\n") elif (args.a == 'mda'): rScript.write('\n\nprint ("Section7: Creating the data frame") \n') rScript.write('df = data.frame(') currentFeatureNumber = 0 for feature in features: userFriendlyName = features[feature] userFriendlyName = userFriendlyName.replace('[', '') userFriendlyName = userFriendlyName.replace(']', '') userFriendlyName = userFriendlyName.replace('(', '') userFriendlyName = userFriendlyName.replace(')', '') rScript.write(userFriendlyName + '=' + feature + pTargetVariableKey + '[,2]') currentFeatureNumber = currentFeatureNumber + 1 if (len(features) > currentFeatureNumber): rScript.write(',') rScript.write(")\n\n") rScript.write('print ("Section8: Running ' + algo + ' prediction") \n') rScript.write('Prob<- predict (fit, df)') rScript.write("\n\n") else: print "The only valid options are glmnet, logitr, randomForest or mda" os._exit(-1) rScript.write( '\nprint ("Section9: Creating the data frame to write in the file") \n' ) rScript.write('dfForFile <- data.frame(' + features.keys()[0] + pTargetVariableKey + '[,1]) \n') rScript.write( '\nprint ("Section10: Putting the probabilities in the data frame") \n' ) rScript.write('dfForFile <- cbind(dfForFile,Prob) \n') if len(pDouble) == 0: rScript.write('\nprint ("Section11: Saving the predictions in file /p/'+ os.path.basename(os.path.dirname(args.e))+'/' + args.a + pTargetVariableKey + '-td.' + os.path.basename(os.path.abspath(args.td)) + \ '-dt.' + args.dt + '-targetClass.' + args.targetClass + '-f.' + os.path.basename(os.path.dirname(pathToDesignFile)) + \ "-wt." + args.wt+ attribute.generateExtension() +'.predictions") \n') else: rScript.write('\nprint ("Section11: Saving the predictions in file /p/'+ os.path.basename(os.path.dirname(args.e))+'/' + args.a + pTargetVariableKey + '-td.' + os.path.basename(os.path.abspath(args.td)) + \ '-dt.' + args.dt + '-targetClass.' + args.targetClass + '-f.' + os.path.basename(os.path.dirname(pathToDesignFile)) + \ "-wt." + args.wt+ attribute.generateExtension() +'double.predictions") \n') if pUseWhichArgumentForData == 4: if len(pDouble) == 0: rScript.write('fileName = paste(args[4],"/p/","' +os.path.basename(os.path.dirname(args.e))+'/'+ args.a + pTargetVariableKey + '-td.' + os.path.basename(os.path.abspath(args.td)) + \ '-dt.' + args.dt + '-targetClass.' + args.targetClass + '-f.' + os.path.basename(os.path.dirname(pathToDesignFile))+ \ "-wt." + args.wt+ attribute.generateExtension() +'.predictions",sep="") \n') else: rScript.write('fileName = paste(args[4],"/p/","' +os.path.basename(os.path.dirname(args.e))+'/'+ args.a + pTargetVariableKey + '-td.' + os.path.basename(os.path.abspath(args.td)) + \ '-dt.' + args.dt + '-targetClass.' + args.targetClass + '-f.' + os.path.basename(os.path.dirname(pathToDesignFile))+ \ "-wt." + args.wt+ attribute.generateExtension() +'double.predictions",sep="") \n') else: if len(pDouble) == 0: rScript.write('fileName = paste(args[2],"/p/","' +os.path.basename(os.path.dirname(args.e))+'/'+ args.a + pTargetVariableKey + '-td.' + os.path.basename(os.path.abspath(args.td)) + \ '-dt.' + args.dt + '-targetClass.' + args.targetClass + '-f.' + os.path.basename(os.path.dirname(pathToDesignFile)) +\ "-wt." + args.wt+ attribute.generateExtension() + '.predictions",sep="") \n') else: rScript.write('fileName = paste(args[2],"/p/","' +os.path.basename(os.path.dirname(args.e))+'/'+ args.a + pTargetVariableKey + '-td.' + os.path.basename(os.path.abspath(args.td)) + \ '-dt.' + args.dt + '-targetClass.' + args.targetClass + '-f.' + os.path.basename(os.path.dirname(pathToDesignFile)) +\ "-wt." + args.wt+ attribute.generateExtension() + 'double.predictions",sep="") \n') rScript.write('print (fileName) \n') rScript.write( 'write.table(format(dfForFile,digits=16), file = fileName,sep=",",quote=FALSE)\n' )
def scriptWrapperForPredictProgramRun(predictionDirAfterLastTD): scriptName=lExperimentFolderName+"/predict" + args.a + "-td." + os.path.basename(os.path.abspath(args.td)) + "-dt." + args.dt +"-pd." +\ os.path.basename(os.path.abspath(predictionDirAfterLastTD)) + "-wt." + args.wt + attribute.generateExtension() +".r" dirName = predictionDirAfterLastTD.replace('/ro/','/wf/') utility.runCommand([scriptName,"-d",dirName],args.run,args.sequence)
def ForPredictions(rScript,config,args,pathToDesignFile,pTargetVariableKey,pUseWhichArgumentForData=2,pDouble=""): features = config["features-"+pTargetVariableKey] #Renaming all features if model and predictions are done simultaneously , so that training and prediction data set do not conflict algo = getAlgoName(args) if len(pDouble)==0: predictionModel = algo + pTargetVariableKey + '-td.' + os.path.basename(os.path.abspath(args.td)) + '-dt.' + args.dt + '-targetClass.' + args.targetClass +\ "-wt." + args.wt+ attribute.generateExtension() + '.model' else: predictionModel = algo + pTargetVariableKey + '-td.' + os.path.basename(os.path.abspath(args.td)) + '-dt.' + args.dt + '-targetClass.' + args.targetClass +\ "-wt." + args.wt+ attribute.generateExtension() + 'double.model' rScript.write('\nprint ("Section6: Read in prediction model'+os.path.dirname(pathToDesignFile)+'/'+predictionModel+'") \n') rScript.write('load("'+os.path.dirname(pathToDesignFile)+'/'+predictionModel+'")') if(args.a == 'glmnet'): rScript.write('\n\nprint ("Section7: Creating data frame") \n') rScript.write('df = cbind(') currentFeatureNumber=0 for feature in features: rScript.write(feature+pTargetVariableKey+'[,2]') currentFeatureNumber = currentFeatureNumber+1 if(len(features) > currentFeatureNumber): rScript.write(',') rScript.write(")\n\n") rScript.write('print ("Section8: Running ' + args.a + ' prediction") \n') rScript.write('Prob <- predict (fit, newx = df,s = "lambda.min",type = "response")') rScript.write("\n\n") elif(args.a == 'logitr'): rScript.write('\n\nprint ("Section7: Creating the data frame") \n') rScript.write('df = data.frame(') currentFeatureNumber=0 for feature in features: userFriendlyName = features[feature] userFriendlyName = userFriendlyName.replace('[','') userFriendlyName = userFriendlyName.replace(']','') userFriendlyName = userFriendlyName.replace('(','') userFriendlyName = userFriendlyName.replace(')','') rScript.write(userFriendlyName+'='+feature+pTargetVariableKey+'[,2]') currentFeatureNumber = currentFeatureNumber+1 if(len(features) > currentFeatureNumber): rScript.write(',') rScript.write(")\n\n") rScript.write('print ("Section8: Running ' + args.a + ' prediction") \n') rScript.write('Prob<- predict (fit, newdata = df, type = "response")') rScript.write("\n\n") elif(args.a == 'randomForest'): rScript.write('\n\nprint ("Section7: Creating the data frame") \n') rScript.write('df = data.frame(') currentFeatureNumber=0 for feature in features: userFriendlyName = features[feature] userFriendlyName = userFriendlyName.replace('[','') userFriendlyName = userFriendlyName.replace(']','') userFriendlyName = userFriendlyName.replace('(','') userFriendlyName = userFriendlyName.replace(')','') rScript.write(userFriendlyName+'='+feature+pTargetVariableKey+'[,2]') currentFeatureNumber = currentFeatureNumber+1 if(len(features) > currentFeatureNumber): rScript.write(',') rScript.write(")\n\n") rScript.write('print ("Section8: Running ' + args.a + ' prediction") \n') rScript.write('Prob<- predict (fit, df)') rScript.write("\n\n") elif(args.a == 'mda'): rScript.write('\n\nprint ("Section7: Creating the data frame") \n') rScript.write('df = data.frame(') currentFeatureNumber=0 for feature in features: userFriendlyName = features[feature] userFriendlyName = userFriendlyName.replace('[','') userFriendlyName = userFriendlyName.replace(']','') userFriendlyName = userFriendlyName.replace('(','') userFriendlyName = userFriendlyName.replace(')','') rScript.write(userFriendlyName+'='+feature+pTargetVariableKey+'[,2]') currentFeatureNumber = currentFeatureNumber+1 if(len(features) > currentFeatureNumber): rScript.write(',') rScript.write(")\n\n") rScript.write('print ("Section8: Running ' + algo + ' prediction") \n') rScript.write('Prob<- predict (fit, df)') rScript.write("\n\n") else: print "The only valid options are glmnet, logitr, randomForest or mda" os._exit(-1) rScript.write('\nprint ("Section9: Creating the data frame to write in the file") \n') rScript.write('dfForFile <- data.frame('+features.keys()[0]+pTargetVariableKey+'[,1]) \n') rScript.write('\nprint ("Section10: Putting the probabilities in the data frame") \n') rScript.write('dfForFile <- cbind(dfForFile,Prob) \n') if len(pDouble)==0: rScript.write('\nprint ("Section11: Saving the predictions in file /p/'+ os.path.basename(os.path.dirname(args.e))+'/' + args.a + pTargetVariableKey + '-td.' + os.path.basename(os.path.abspath(args.td)) + \ '-dt.' + args.dt + '-targetClass.' + args.targetClass + '-f.' + os.path.basename(os.path.dirname(pathToDesignFile)) + \ "-wt." + args.wt+ attribute.generateExtension() +'.predictions") \n') else: rScript.write('\nprint ("Section11: Saving the predictions in file /p/'+ os.path.basename(os.path.dirname(args.e))+'/' + args.a + pTargetVariableKey + '-td.' + os.path.basename(os.path.abspath(args.td)) + \ '-dt.' + args.dt + '-targetClass.' + args.targetClass + '-f.' + os.path.basename(os.path.dirname(pathToDesignFile)) + \ "-wt." + args.wt+ attribute.generateExtension() +'double.predictions") \n') if pUseWhichArgumentForData == 4: if len(pDouble)==0: rScript.write('fileName = paste(args[4],"/p/","' +os.path.basename(os.path.dirname(args.e))+'/'+ args.a + pTargetVariableKey + '-td.' + os.path.basename(os.path.abspath(args.td)) + \ '-dt.' + args.dt + '-targetClass.' + args.targetClass + '-f.' + os.path.basename(os.path.dirname(pathToDesignFile))+ \ "-wt." + args.wt+ attribute.generateExtension() +'.predictions",sep="") \n') else: rScript.write('fileName = paste(args[4],"/p/","' +os.path.basename(os.path.dirname(args.e))+'/'+ args.a + pTargetVariableKey + '-td.' + os.path.basename(os.path.abspath(args.td)) + \ '-dt.' + args.dt + '-targetClass.' + args.targetClass + '-f.' + os.path.basename(os.path.dirname(pathToDesignFile))+ \ "-wt." + args.wt+ attribute.generateExtension() +'double.predictions",sep="") \n') else: if len(pDouble)==0: rScript.write('fileName = paste(args[2],"/p/","' +os.path.basename(os.path.dirname(args.e))+'/'+ args.a + pTargetVariableKey + '-td.' + os.path.basename(os.path.abspath(args.td)) + \ '-dt.' + args.dt + '-targetClass.' + args.targetClass + '-f.' + os.path.basename(os.path.dirname(pathToDesignFile)) +\ "-wt." + args.wt+ attribute.generateExtension() + '.predictions",sep="") \n') else: rScript.write('fileName = paste(args[2],"/p/","' +os.path.basename(os.path.dirname(args.e))+'/'+ args.a + pTargetVariableKey + '-td.' + os.path.basename(os.path.abspath(args.td)) + \ '-dt.' + args.dt + '-targetClass.' + args.targetClass + '-f.' + os.path.basename(os.path.dirname(pathToDesignFile)) +\ "-wt." + args.wt+ attribute.generateExtension() + 'double.predictions",sep="") \n') rScript.write('print (fileName) \n') rScript.write('write.table(format(dfForFile,digits=16), file = fileName,sep=",",quote=FALSE)\n')
def main(): parser = argparse.ArgumentParser( description= 'Generates train.r. A sample command is mGenForE.py -e ob/e1/ ') parser.add_argument( '-e', required=True, help='Experiement folder to use to find the features and targets') parser.add_argument('-a', required=True, help='Algorithm name') parser.add_argument( '-targetClass', required=True, help= "binomial(target takes only true and false) / multinomial (target values takes more than 2 values)" ) parser.add_argument( '-skipT', required=False, help= "yes or no , If you want to regenerate already generated algorithm model file then make this value No" ) parser.add_argument('-td', required=True, help="Day on which it was trained") parser.add_argument('-dt', required=True, help="Number of days it was trained") parser.add_argument( '-wt', required=True, help="default/exp , weight type to be given to different days") parser.add_argument('-iT', required=False, help='Instrument name') parser.add_argument('-sP', required=False, help='Strike price of instrument') parser.add_argument('-oT', required=False, help='Options Type') parser.add_argument('-treeType', required=False, help="Tree read for trade engine") parser.add_argument('-tTD', required=False, help="Tree number of days to be used") args = parser.parse_args() attribute.initializeInstDetails(args.iT, args.sP, args.oT) if args.skipT == None: args.skipT = "yes" if args.tTD == None: args.tTD = args.dt print "Using the experiment folder " + args.e config = ConfigObj(args.e + "/design1.ini") configInit = ConfigObj(args.e + "design.ini") # configInitList = [] # for iniFile in os.listdir(args.e + "/"): # if '.ini' in iniFile and iniFile != 'design.ini': # index = iniFile[ file.index(".") - 1 ] # configInitList.append( ( index, ConfigObj(args.e+"/"+iniFile) ) ) # configInit = dict(configInitList) print "The config parameters that I am working with are" print config dirName = os.path.dirname(args.e) + "/" algo = rCodeGen.getAlgoName(args) rProgName = "traintree" + "-td." + os.path.basename( os.path.abspath(args.td) ) + "-tTD" + args.tTD + "-dt." + args.dt + "-wt." + args.wt + attribute.generateExtension( ) + ".r" rProgLocation = dirName + '/' + rProgName rScript = open(rProgLocation, 'w') rScript.write('#!/usr/bin/Rscript \n') rScript.write('require (rpart) \n') rCodeGen.ForSetUpChecks(rScript) lAllFilePresent = True for target in config['target']: lTreeFileName = dirName + "/" + algo + target + '-td.' + os.path.basename( os.path.abspath(args.td) ) + "-tTD" + args.tTD + '-dt.' + args.dt + attribute.generateExtension( ) + ".tree" + args.treeType if os.path.isfile(lTreeFileName) and (args.skipT.lower() == "yes"): continue else: lAllFilePresent = False break if lAllFilePresent == False: rCodeGen.ToReadTargetFile(rScript, config) rCodeGen.ForWtVectorGeneration(rScript, args.wt.lower()) for target in config['target']: rCodeGen.ToReadFeatureFiles(rScript, config, target) rCodeGen.ToReadPredictionFiles(rScript, config, target, configInit) rCodeGen.ForSanityChecks(rScript, config, target) lTreeFileName = dirName + "/" + algo + target + '-td.' + os.path.basename( os.path.abspath(args.td) ) + "-tTD" + args.tTD + '-dt.' + args.dt + attribute.generateExtension( ) + ".tree" + args.treeType if os.path.isfile(lTreeFileName) and (args.skipT.lower() == "yes"): print "Model File " + lTreeFileName + " already exists . So it will not be formed again . If you want to re-generate model then re-run with -skipT=No" else: rCodeGen.ToRenameDataBeforeTraining(rScript, config, target) rCodeGen.ForTrainingTree(rScript, args, config, target, args.treeType) print lTreeFileName rCodeGen.saveTrainingTree(rScript, args, dirName, target, lTreeFileName) rScript.close() print "Finished generating R training program: " + rProgLocation os.system("chmod +x " + rProgLocation)
pLPerLotShort=(averageOpenSellPrice - averageCloseBuyPrice)* 1000 pLPerLotLong=(averageCloseSellPrice - averageOpenBuyPrice)* 1000 print("1 lot has 1000 qty's so P/L Short per lot is: " + str(pLPerLotShort), file = outputFile) print("1 lot has 1000 qty's so P/L Long per lot is: " + str(pLPerLotLong), file = outputFile) print("P/L for Short trading 10 lots is: " + str(pLPerLotShort * 10), file = outputFile) print("P/L for Long trading 10 lots is: " + str(pLPerLotLong * 10), file = outputFile) if __name__ == "__main__": tStart = datetime.now() dirName = args.pd.replace('/ro/','/rs/') checkAllFilesAreExistOrNot = 'false' lWFDirName = args.pd.replace('/ro/','/wf/') predictedBuyValuesFileName = lWFDirName+"/p/"+mainExperimentName+"/"+args.a + 'buy' + '-td.' + os.path.basename(os.path.abspath(args.td)) + '-dt.' + \ args.dt + '-targetClass.' + args.targetClass + '-f.' + buyExperimentName + "-wt." + args.wt+ attribute.generateExtension() + ".predictions" predictedSellValuesFileName = lWFDirName+"/p/"+mainExperimentName+"/"+args.a + 'sell' + '-td.' + os.path.basename(os.path.abspath(args.td)) + '-dt.' +\ args.dt + '-targetClass.' + args.targetClass + '-f.' + sellExperimentName + "-wt." + args.wt+ attribute.generateExtension() + ".predictions" lEntryClList = args.entryCL.split(";") lExitClList = args.exitCL.split(";") if len(lEntryClList)!= len(lExitClList): print("Len of entry and exit list does match. Entry List length = " , len(lEntryClList) , " and ExitCL List length = " , len(lExitClList)) os._exit(-1) lengthOfList = len(lEntryClList) lMinOfExitCl = 9999.000 fileNameList = [] finalEntryClList = [] finalExitClList = []
parser.add_argument('-sequence', required=True,help='lp / dp / serial') parser.add_argument('-dt',required=False,help='No of day from start for which it is to be trained ') parser.add_argument('-wt',required=True,help="default/exp , weight type to be given to different days") parser.add_argument('-iT',required=False,help='Instrument name') parser.add_argument('-sP',required=False,help='Strike price of instrument') parser.add_argument('-oT',required=False,help='Options Type') args = parser.parse_args() attribute.initializeInstDetails(args.iT,args.sP,args.oT) if args.a is not None: algo = args.a else: algo = 'glmnet' if args.dt == None: args.dt = "1" dirName = args.td.replace('/ro/','/wf/') scriptName = args.e+"/train" + algo + "-td." + os.path.basename(os.path.abspath(args.td)) + "-dt." + args.dt + "-wt." + args.wt + attribute.generateExtension() +".r" trainingDataList = attribute.getListOfTrainingDirectoriesNames(args.dt,dirName,args.iT) trainingDataListString = ";".join(trainingDataList) utility.runCommand([scriptName,"-d",trainingDataListString],args.run,args.sequence) dirName = args.pd.replace('/ro/','/wf/') scriptName=args.e+"/predict" + algo + "-td." + os.path.basename(os.path.abspath(args.td)) + "-dt." + args.dt +\ "-pd." + os.path.basename(os.path.abspath(args.pd)) + "-wt." + args.wt + attribute.generateExtension() +".r" utility.runCommand([scriptName,"-d",dirName],args.run,args.sequence)
else: mainExperimentName = pathAfterE mainExperimentNameList.append(mainExperimentName) experimentName = os.path.basename(absPathOfExperimentName) sys.path.append("./src/") sys.path.append("./ob/generators/") config = ConfigObj(experiment + "/design.ini") featureTargetFilePath = args.pd.replace('ro', 'wf') for feature in config["features-buy"]: lName = config["features-buy"][feature].replace('(', '').replace(')', '') if lName not in featureNames: lFeatureFile = featureTargetFilePath + "/f/" + lName + attribute.generateExtension( ) + ".feature" featureFP = open(lFeatureFile, "rb") featureFpList.append(featureFP) featureNames.append(lName) for feature in config["features-sell"]: lName = config["features-sell"][feature].replace('(', '').replace(')', '') if lName not in featureNames: lFeatureFile = featureTargetFilePath + "/f/" + lName + attribute.generateExtension( ) + ".feature" featureFP = open(lFeatureFile, "rb") featureFpList.append(featureFP) featureNames.append(lName) dirName = args.pd.replace('/ro/', '/wf/')
required=True, help="default/exp , weight type to be given to different days") parser.add_argument('-iT', required=False, help='Instrument name') parser.add_argument('-sP', required=False, help='Strike price of instrument') parser.add_argument('-oT', required=False, help='Options Type') args = parser.parse_args() attribute.initializeInstDetails(args.iT, args.sP, args.oT) if args.a is not None: algo = args.a else: algo = 'glmnet' if args.dt == None: args.dt = "1" dirName = args.td.replace('/ro/', '/wf/') scriptName = args.e + "/train" + algo + "-td." + os.path.basename( os.path.abspath(args.td) ) + "-dt." + args.dt + "-wt." + args.wt + attribute.generateExtension() + ".r" trainingDataList = attribute.getListOfTrainingDirectoriesNames( args.dt, dirName, args.iT) trainingDataListString = ";".join(trainingDataList) utility.runCommand([scriptName, "-d", trainingDataListString], args.run, args.sequence) dirName = args.pd.replace('/ro/', '/wf/') scriptName=args.e+"/predict" + algo + "-td." + os.path.basename(os.path.abspath(args.td)) + "-dt." + args.dt +\ "-pd." + os.path.basename(os.path.abspath(args.pd)) + "-wt." + args.wt + attribute.generateExtension() +".r" utility.runCommand([scriptName, "-d", dirName], args.run, args.sequence)
if len(args.nodes) == 0: for target in ['buy', 'sell']: lTreeFileName = "/home/vikas/ml/ob/e/nsecur/ABAll_AmBRAmBAll/s/2c/AmBRAmB//glmnet" + target + "-td.20140821-tTD30-dt.10.tree1" #args.e+"/"+args.a+ target+'-td.' + os.path.basename(os.path.abspath(args.td)) + '-dt.' + args.dt + attribute.generateExtension() +".tree" + args.treeType dd.gGlobalTree[target], lVariable = reading_tree.reading_tree( lTreeFileName, args.treeType) dd.gTreeVariablesPresent = dd.gTreeVariablesPresent + lVariable dd.gFinalCondition[target]["0"] = '' dd.gFinalCondition[target]["0"] = reading_tree.traverse_tree( 1, args.treeType, 0.0, dd.gGlobalTree[target], dd.gFinalCondition[target]["0"]) print("Calling tree traversal ") #,dd.gFinalCondition[target]["0"]) else: for target in ['buy', 'sell']: lTreeFileName = args.e + "/" + args.a + target + '-td.' + os.path.basename( os.path.abspath( args.td)) + '-dt.' + args.dt + attribute.generateExtension( ) + ".tree" + args.treeType dd.gGlobalTree[target], lVariable = reading_tree.reading_tree( lTreeFileName, args.treeType) dd.gTreeVariablesPresent = dd.gTreeVariablesPresent + lVariable nodes = args.nodes.split(";") dd.gFinalCondition[target]['nodes'] = reading_tree.traverse_nodes( args.treeType, nodes, dd.gGlobalTree[target]) config = ConfigObj(args.e + "/design1.ini") lListOfPredictionDirectory = attribute.getListOfTrainingDirectoriesNames( int(args.nPD), args.pd, args.iT) lBuyOutputFileObject = args.e + "/Buy-OutOfSampleTree-" + '-pd.' + os.path.basename( os.path.abspath( args.pd)) + '-nPD.' + args.nPD + attribute.generateExtension( ) + ".tree" + args.treeType lSellOutputFileObject = args.e + "/Sell-OutOfSampleTree-" + '-pd.' + os.path.basename(
if 'nsecur' in absPathOfExperimentName: pathAfterE = absPathOfExperimentName[absPathOfExperimentName.index("/nsecur/")+8:] elif 'nsefut' in absPathOfExperimentName: pathAfterE = absPathOfExperimentName[absPathOfExperimentName.index("/nsefut/")+8:] elif 'nseopt' in absPathOfExperimentName: pathAfterE = absPathOfExperimentName[absPathOfExperimentName.index("/nseopt/")+8:] if "/" in pathAfterE: mainExperimentName = pathAfterE[:pathAfterE.index("/")] else: mainExperimentName = pathAfterE experimentName = os.path.basename(absPathOfExperimentName) initialFileName = args.a + '-td.' + os.path.basename(os.path.abspath(args.td)) + \ '-dt.' + args.dt + '-targetClass.' + args.targetClass + '-f.' + experimentName + "-wt." + args.wt+ attribute.generateExtension() + \ '-l.'+args.entryCL+"-"+args.exitCL + "-te3" gTickSize = args.tickSize def getPredictedValuesIntoDict(pPredictedValuesDict): # The following will take care if args.e = "ob/e1/" or args.e = "ob/e1" dirName = args.pd.replace('/ro/','/wf/') config = ConfigObj(args.e+"/design.ini") target = config["target"] predictedValuesFileName = dirName+"/p/"+mainExperimentName+"/"+args.a + target.keys()[0] + '-td.' + os.path.basename(os.path.abspath(args.td)) + \ '-dt.' + args.dt + '-targetClass.' + args.targetClass + '-f.' + experimentName + "-wt." + args.wt+ attribute.generateExtension() + ".predictions" print("Predicted values file : "+ predictedValuesFileName) sys.stdout.flush() predictedValuesFile = open(predictedValuesFileName) fileHasHeader = True numberOfLinesInPredictedValuesFile = 0
lTreeTrainingList = [] for i in range(len(allDataDirectories)-int(args.dt)): args.td = allDataDirectories[i] predictionDirLastTD = allDataDirectories[i + int(args.dt) - 1] predictionDirAfterLastTD = allDataDirectories[i + int(args.dt)] lRCodeGenCommandList.append(["mRGenForE.py","-e",lExperimentFolderName,"-a",algo,"-targetClass",args.targetClass,"-skipM",args.skipM,"-td",args.td, "-dt" , \ args.dt , '-wt' , wt,"-iT",args.iT,"-oT",args.oT,"-sP",args.sP ,'-double', args.double]) # lRCodeGenCommandList.append(["pRGenForE.py","-e",args.e,"-s",lExperimentFolderName,"-a",algo,"-skipP",args.skipP,"-td",args.td , "-pd" , predictionDirLastTD , \ # "-dt" , args.dt , "-targetClass" , args.targetClass , '-wt' , wt,"-iT",args.iT,"-oT",args.oT,"-sP",args.sP,'-double', args.double]) lRCodeGenCommandList.append(["pRGenForE.py","-e",args.e,"-s",lExperimentFolderName,"-a",algo,"-skipP",args.skipP,"-td",args.td , "-pd" , predictionDirAfterLastTD ,\ "-dt" , args.dt , "-targetClass" , args.targetClass , '-wt' , wt,"-iT",args.iT,"-oT",args.oT,"-sP",args.sP,'-double', args.double]) dirName = args.td.replace('/ro/','/wf/') if args.double: scriptName = lExperimentFolderName+"/train" + algo + "-td." + os.path.basename(os.path.abspath(args.td)) + "-dt." + args.dt + "-wt." + wt + attribute.generateExtension() +"double.r" else: scriptName = lExperimentFolderName+"/train" + algo + "-td." + os.path.basename(os.path.abspath(args.td)) + "-dt." + args.dt + "-wt." + wt + attribute.generateExtension() +".r" trainingDataCorrespondingDateList = [] trainingDataList = [] #";".join(allDataDirectories[i:i+ int(args.dt) ]) lCount = i treeDataList = [] for trainDirs in allDataDirectories[i:i+ int(args.dt)]: trainingDataList.append(trainDirs.replace('/ro/','/wf/')) try: allDataDirectories[i+ int(args.tTD)] for treeDirs in allDataDirectories[i:i+ int(args.tTD)]:
#results = map(scriptWrapperForFeatureGeneration,allDataDirectories) pass #==========R Code formation to find correlation between features and target file ==============================0 utility.runCommand(["corrRGenForEForAllDays.py","-e",l_exp_dir,"-td",args.td,"-dt",args.dt,"-iT",args.iT,"-oT",args.oT,"-sP",args.sP],args.run,args.sequence) if args.sequence=="dp": print dp.printGroupStatus() #========Running the correlation R program========================= allWorkingFileDirectories = attribute.getListOfTrainingDirectoriesNames( int(args.nDays) , args.td.replace('/ro/','/wf/') ,args.iT) allWorkingFileDirectoriesString = ";".join(allWorkingFileDirectories) lCorrCommandList = [] if args.sequence == "dp": for l_training_day in allWorkingFileDirectories: lDate = os.path.basename(os.path.abspath(l_training_day)) lFileName = l_exp_dir + "/corr-date-" + lDate + "-td." + os.path.basename(os.path.abspath(args.td)) + "-dt." + args.dt + attribute.generateExtension() +".r" lCorrCommandList.append([lFileName,'-d',l_training_day]) utility.runListOfCommandsWithMaxUtlilizationOfWorkers(lCorrCommandList,args,"Day-wise Correlation",int(args.nComputers)) else: def scriptWrapperForDayWiseCorrelation(pTrainingDay): lDate = os.path.basename(os.path.abspath(pTrainingDay)) lFileName = l_exp_dir + "/corr-date-" + lDate + "-td." + os.path.basename(os.path.abspath(args.td)) + "-dt." + args.dt + attribute.generateExtension() +".r" utility.runCommand([lFileName,'-d',pTrainingDay],args.run,args.sequence) results = map(scriptWrapperForDayWiseCorrelation,allWorkingFileDirectories) summary_file_name = l_exp_dir + '/correlation-coef' + '-td.' + os.path.basename(os.path.abspath(args.td))+ '-dt.' + args.dt + attribute.generateExtension() + ".coef" wfo = open(summary_file_name, 'w') lBuyDict = {} lSellDict = {} lDayWiseBuy = {}
def main(): parser = argparse.ArgumentParser(description='Generates train.r. A sample command is mGenForE.py -e ob/e1/ ') parser.add_argument('-e', required=True,help='Experiement folder to use to find the features and targets') parser.add_argument('-a', required=True,help='Algorithm name') parser.add_argument('-s', required=True,help='Location of the folder containing all the sub experiments') parser.add_argument('-targetClass',required=True,help="binomial(target takes only true and false) / multinomial (target values takes more than 2 values)") parser.add_argument('-skipM',required=False,help="yes or no , If you want to regenerate already generated algorithm model file then make this value No") parser.add_argument('-skipP',required=False,help="yes or no , If you want to regenerate already generated algorithm prediction file then make this value No") parser.add_argument('-pd', required=True,help='Prediction directory') parser.add_argument('-td',required=True,help="Day on which it was trained") parser.add_argument('-dt',required=True,help="Number of days it was trained") parser.add_argument('-wt',required=True,help="default/exp , weight type to be given to different days") parser.add_argument('-iT',required=False,help='Instrument name') parser.add_argument('-sP',required=False,help='Strike price of instrument') parser.add_argument('-oT',required=False,help='Options Type') args = parser.parse_args() attribute.initializeInstDetails(args.iT,args.sP,args.oT) if args.skipM == None: args.skipM = "yes" if args.skipP == None: args.skipP = "yes" print "Using the experiment folder " + args.e print "Training files steps" config = ConfigObj(args.e+"/design.ini") print "The config parameters that I am working with are" print config predictionDataDirectoryName = args.pd.replace('/ro/','/wf/') predictionDataDirectoryName = predictionDataDirectoryName + "/p/" + os.path.basename(os.path.dirname(args.e)) if not os.path.exists(predictionDataDirectoryName): os.mkdir(predictionDataDirectoryName) dirName=os.path.dirname(args.e) algo = rCodeGen.getAlgoName(args) args.s = args.s + "/" rProgName = "trainPredict"+ algo + "-td." + os.path.basename(os.path.abspath(args.td)) + "-dt." + args.dt +\ "-pd." + os.path.basename(os.path.abspath(args.pd)) + "-wt." + args.wt+ attribute.generateExtension() + "-For"+os.path.basename(os.path.dirname(args.s))+"SubE.r" rProgLocation = dirName+'/'+rProgName rScript = open(rProgLocation,'w') rScript.write('#!/usr/bin/Rscript \n') if(algo == 'glmnet'): rScript.write('require (glmnet) \n') elif(algo == 'randomForest'): rScript.write('require (randomForest) \n') elif(algo == 'mda'): rScript.write('require (mda) \n') rCodeGen.ForSetUpChecksForTrainPredictTogather(rScript) rCodeGen.ToReadTargetFile(rScript,config) rCodeGen.ForWtVectorGeneration(rScript,args.wt.lower()) for target in config['target']: rCodeGen.ToReadFeatureFiles(rScript,config,target,2) rCodeGen.ForSanityChecks(rScript,config,target) print "For prediction data set" configForPredictions = ConfigObj(args.e+"/design.ini") print "The config parameters that I am working with are" for target in configForPredictions['target']: feature_keys = configForPredictions['features-'+target].keys() features = configForPredictions['features-'+target] for key in feature_keys: new_key = key + "P" features[new_key] = features[key] del features[key] print configForPredictions rCodeGen.ToReadFeatureFiles(rScript,configForPredictions,target,4) rCodeGen.ForSanityChecks(rScript,configForPredictions,target) designFiles = utility.list_files(args.s) for designFile in designFiles: print "Generating r code for " + designFile rScript.write('\n\nprint ("Running r code for ' + designFile + '")\n') config = ConfigObj(designFile) configForPredictions = ConfigObj(designFile) #--------------MODEL-------------------- for target in config['target']: feature_keys = configForPredictions['features-'+target].keys() features = configForPredictions['features-'+target] for key in feature_keys: new_key = key + "P" features[new_key] = features[key] del features[key] lModelGeneratedAfterTraining = os.path.dirname(designFile) + '/' + algo + target + '-td.' + os.path.basename(os.path.abspath(args.td)) + \ '-dt.' + args.dt + '-targetClass.' + args.targetClass + "-wt." + args.wt+ attribute.generateExtension() + '.model' if os.path.isfile(lModelGeneratedAfterTraining)and ( args.skipM.lower() == "yes" ): print "Model File " + lModelGeneratedAfterTraining + " already exists . So it will not be formed again . If you want to re-generate model then re-run with -skipM=No" else: rCodeGen.ToCreateDataFrameForTraining(rScript,config,target) rCodeGen.ForTraining(rScript,args,config,target) rCodeGen.saveTrainingModel(rScript,args,os.path.dirname(designFile),target) #--------------Prediction Part-------------------- predictionFileName = predictionDataDirectoryName + "/" + args.a + target + '-td.' + os.path.basename(os.path.abspath(args.td)) + \ '-dt.' + args.dt + '-targetClass.' + args.targetClass + '-f.' + os.path.basename(os.path.dirname(designFile)) +\ "-wt." + args.wt+ attribute.generateExtension() +".predictions" if not os.path.isfile(predictionFileName) or ( args.skipP.lower() == "no" ): rCodeGen.ForPredictions(rScript,configForPredictions,args,designFile,target,4) else: print "Prediction File " + predictionFileName + "Already exists , not generating it again . If you want to generate it again then rerun it with -skipP no " rScript.write('rm(list=ls())') rScript.close() print "Finished generating R training program: " + rProgLocation os.system("chmod +x "+rProgLocation)
for target in ['buy','sell']: lTreeFileName = "/home/vikas/ml/ob/e/nsecur/ABAll_AmBRAmBAll//s/2c/AmBRAmB//glmnet" + target + "-td.20140821-tTD30-dt.10.tree1"#args.e+"/"+args.a+ target+'-td.' + os.path.basename(os.path.abspath(args.td)) + '-dt.' + args.dt + attribute.generateExtension() +".tree" + args.treeType dd.gGlobalTree[target],lVariable = reading_tree.reading_tree(lTreeFileName,args.treeType) dd.gTreeVariablesPresent = dd.gTreeVariablesPresent + lVariable for entry,exit in zip(lEntryClList,lExitClList): if entry not in dd.gFinalCondition[target]: dd.gFinalCondition[target][entry] = '' dd.gFinalCondition[target][entry] = reading_tree.traverse_tree(1,args.treeType,float("."+entry),dd.gGlobalTree[target],dd.gFinalCondition[target][entry]) print("Calling tree traversal ",dd.gFinalCondition[target][entry]) if exit not in dd.gFinalCondition[target]: dd.gFinalCondition[target][exit] = '' dd.gFinalCondition[target][exit] = reading_tree.traverse_tree(1,args.treeType,float("."+exit),dd.gGlobalTree[target],dd.gFinalCondition[target][exit]) print("Calling tree traversal " ,dd.gFinalCondition[target][exit]) else: for target in ['buy','sell']: lTreeFileName = args.e+"/"+args.a+ target+'-td.' + os.path.basename(os.path.abspath(args.td)) + '-dt.' + args.dt + attribute.generateExtension() +".tree" + args.treeType dd.gGlobalTree[target],lVariable = reading_tree.reading_tree(lTreeFileName,args.treeType) dd.gTreeVariablesPresent = dd.gTreeVariablesPresent + lVariable nodes = args.nodes.split(";") dd.gFinalCondition[target]['nodes'] = reading_tree.traverse_nodes(args.treeType,nodes,dd.gGlobalTree[target]) config = ConfigObj(args.e+"/design1.ini") for variable in dd.gTreeVariablesPresent: if variable.lower()=="j": predictedBuyValuesFileName = lWFDirName+"/p/"+mainExperimentName+"/"+args.a + 'buy' + '-td.' + os.path.basename(os.path.abspath(args.td)) + '-dt.' + \ args.dt + '-targetClass.' + args.targetClass + '-f.' + experimentName + "-wt." + args.wt+ attribute.generateExtension() + ".predictions" dd.gFileObjectsOfVariablesPresent.append(open(predictedBuyValuesFileName,'r')) elif variable.lower()=="k": predictedSellValuesFileName = lWFDirName+"/p/"+mainExperimentName+"/"+args.a + 'sell' + '-td.' + os.path.basename(os.path.abspath(args.td)) + '-dt.' +\ args.dt + '-targetClass.' + args.targetClass + '-f.' + experimentName + "-wt." + args.wt+ attribute.generateExtension() + ".predictions"
pathAfterE = absPathOfExperimentName[absPathOfExperimentName.index("/nsecur/")+8:] elif 'nsefut' in absPathOfExperimentName: pathAfterE = absPathOfExperimentName[absPathOfExperimentName.index("/nsefut/")+8:] elif 'nseopt' in absPathOfExperimentName: pathAfterE = absPathOfExperimentName[absPathOfExperimentName.index("/nseopt/")+8:] if "/" in pathAfterE: mainExperimentName = pathAfterE[:pathAfterE.index("/")] else: mainExperimentName = pathAfterE experimentName = os.path.basename(absPathOfExperimentName) gTickSize = int(args.tickSize) gMaxQty = int(args.orderQty) initialFileName ='TradeOnTarget-d.' + os.path.basename(os.path.abspath(args.d))+ attribute.generateExtension() + "-tq." + args.orderQty + attribute.generateExtension() + "-dte.7" g_quantity_adjustment_list_for_sell = {} g_quantity_adjustment_list_for_buy = {} gOpenBuyPrice = 0.0 gCloseSellPrice = 0.0 gOpenSellPrice = 0.0 gCloseBuyPrice = 0.0 def functionToReadTargetFileToDictionary(pTargetValuesFile,pTargetValuesDict,pFileHeader): lNumberOfLinesInTargetValuesFile = 0 for line in pTargetValuesFile: if pFileHeader == True: pFileHeader = False continue
def getPredictedValuesIntoDict(pPredictedValuesDict): # The following will take care if args.e = "ob/e1/" or args.e = "ob/e1" dirName = args.pd.replace('/ro/','/wf/') config = ConfigObj(args.e+"/design.ini") target = config["target"] lPredictedBuyValuesDict = dict() predictedBuyValuesFileName = dirName+"/p/"+mainExperimentName+"/"+args.a + target.keys()[0] + '-td.' + os.path.basename(os.path.abspath(args.td)) + \ '-dt.' + args.dt + '-targetClass.' + args.targetClass + '-f.' + experimentName + "-wt." + args.wt+ attribute.generateExtension() + ".predictions" print("Buy Predicted values file : "+ predictedBuyValuesFileName) sys.stdout.flush() predictedBuyValuesFile = open(predictedBuyValuesFileName) fileHasHeader = True numberOfLinesInBuyPredictedValuesFile = functionToReadPredictionFileToDictionary(predictedBuyValuesFile,lPredictedBuyValuesDict,fileHasHeader) print("Finished reading the buy predicted values file") print("The number of elements in the buy predicted values dictionary is : " + str(len(lPredictedBuyValuesDict))) if (numberOfLinesInBuyPredictedValuesFile != len(lPredictedBuyValuesDict)): print("Number of duplicate time stamps rejected in buy predicted values dictionary = " + str(numberOfLinesInBuyPredictedValuesFile - len(lPredictedBuyValuesDict))) #os._exit(-1) sys.stdout.flush() lPredictedSellValuesDict = dict() predictedSellValuesFileName = dirName+"/p/"+mainExperimentName+"/"+args.a + target.keys()[1] + '-td.' + os.path.basename(os.path.abspath(args.td)) + \ '-dt.' + args.dt + '-targetClass.' + args.targetClass + '-f.' + experimentName + "-wt." + args.wt + attribute.generateExtension()+ ".predictions" print("Sell Predicted values file : "+ predictedSellValuesFileName) sys.stdout.flush() predictedSellValuesFile = open(predictedSellValuesFileName) fileHasHeader = True numberOfLinesInSellPredictedValuesFile = functionToReadPredictionFileToDictionary(predictedSellValuesFile,lPredictedSellValuesDict,fileHasHeader) print("Finished reading the sell predicted values file") print("The number of elements in the sell predicted values dictionary is : " + str(len(lPredictedSellValuesDict))) if (numberOfLinesInSellPredictedValuesFile != len(lPredictedSellValuesDict)): print("Number of duplicate timestamps rejected in sell predicted values dictionary = " + str(numberOfLinesInSellPredictedValuesFile - len(lPredictedSellValuesDict))) #os._exit(-1) sys.stdout.flush() #-----------------Getting predicted values into dictionary ------------------------------------- for elements in lPredictedBuyValuesDict.keys(): pPredictedValuesDict[elements] = {} pPredictedValuesDict[elements]['buy'] = lPredictedBuyValuesDict[elements] pPredictedValuesDict[elements]['sell'] = lPredictedSellValuesDict[elements]
if "/" in pathAfterE: mainExperimentName = pathAfterE[:pathAfterE.index("/")] else: mainExperimentName = pathAfterE experimentName = os.path.basename(absPathOfExperimentName) gTickSize = int(args.tickSize) gMaxQty = int(args.orderQty) startTimeList = args.startTime.split(";") endTimeList = args.endTime.split(";") initialFileName = [] for indexOfCL in range(0, len(startTimeList)): lInitialFileName = 'DummyTradeEngine-d.' + os.path.basename( os.path.abspath(args.d) ) + attribute.generateExtension( ) + '-l.' + startTimeList[indexOfCL] + "-" + endTimeList[ indexOfCL] + "-tq." + args.orderQty + "-tarType" + args.targetType + attribute.generateExtension( ) + "-dte.7" initialFileName.append(lInitialFileName) g_quantity_adjustment_list_for_sell = {} g_quantity_adjustment_list_for_buy = {} def functionToReadTargetFileToDictionary(pTargetValuesFile, pTargetValuesDict, pFileHeader): lNumberOfLinesInTargetValuesFile = 0 for line in pTargetValuesFile: if pFileHeader == True: pFileHeader = False continue
def getPredictedValuesIntoDict(pPredictedValuesDict): # The following will take care if args.e = "ob/e1/" or args.e = "ob/e1" dirName = args.pd.replace('/ro/','/wf/') config = ConfigObj(args.e+"/design.ini") target = config["target"] predictedValuesFileName = dirName+"/p/"+mainExperimentName+"/"+args.a + target.keys()[0] + '-td.' + os.path.basename(os.path.abspath(args.td)) + \ '-dt.' + args.dt + '-targetClass.' + args.targetClass + '-f.' + experimentName + "-wt." + args.wt+ attribute.generateExtension() + ".predictions" print("Predicted values file : "+ predictedValuesFileName) sys.stdout.flush() predictedValuesFile = open(predictedValuesFileName) fileHasHeader = True numberOfLinesInPredictedValuesFile = 0 for line in predictedValuesFile: if fileHasHeader == True: fileHasHeader = False continue line=line.rstrip('\n') splitLine = line.split(',',2) timeStamp = float(splitLine[1]) try:#TODO: remove this and then run the code to identify errors. predictedProb = float(splitLine[2]) except: predictedProb = 0 pPredictedValuesDict[timeStamp] = predictedProb numberOfLinesInPredictedValuesFile += 1 print("Finished reading the predicted values file") print("The number of elements in the predicted values dictionary is : " + str(len(pPredictedValuesDict))) if (numberOfLinesInPredictedValuesFile != len(pPredictedValuesDict)): print("Number of duplicate timestamps rejected = " + str(numberOfLinesInPredictedValuesFile - len(pPredictedValuesDict))) os._exit(-1) sys.stdout.flush()
else: mainExperimentName = pathAfterE mainExperimentNameList.append(mainExperimentName) experimentName = os.path.basename(absPathOfExperimentName) sys.path.append("./src/") sys.path.append("./ob/generators/") config = ConfigObj(experiment+"/design.ini") featureTargetFilePath = args.pd.replace('ro', 'wf') for feature in config["features-buy"]: lName = config["features-buy"][feature].replace('(','').replace(')','') if lName not in featureNames: lFeatureFile = featureTargetFilePath + "/f/" + lName+ attribute.generateExtension() + ".feature" featureFP = open(lFeatureFile, "rb") featureFpList.append(featureFP) featureNames.append(lName) for feature in config["features-sell"]: lName = config["features-sell"][feature].replace('(','').replace(')','') if lName not in featureNames: lFeatureFile = featureTargetFilePath + "/f/" + lName + attribute.generateExtension() + ".feature" featureFP = open(lFeatureFile, "rb") featureFpList.append(featureFP) featureNames.append(lName) dirName = args.pd.replace('/ro/','/wf/') targetSet = config['target']
args.dt, "-iT", args.iT, "-oT", args.oT, "-sP", args.sP ], args.run, args.sequence) if args.sequence == "dp": print dp.printGroupStatus() #========Running the correlation R program========================= allWorkingFileDirectories = attribute.getListOfTrainingDirectoriesNames( int(args.nDays), args.td.replace('/ro/', '/wf/'), args.iT) allWorkingFileDirectoriesString = ";".join(allWorkingFileDirectories) lCorrCommandList = [] if args.sequence == "dp": for l_training_day in allWorkingFileDirectories: lDate = os.path.basename(os.path.abspath(l_training_day)) lFileName = l_exp_dir + "/corr-date-" + lDate + "-td." + os.path.basename( os.path.abspath(args.td) ) + "-dt." + args.dt + attribute.generateExtension() + ".r" lCorrCommandList.append([lFileName, '-d', l_training_day]) utility.runListOfCommandsWithMaxUtlilizationOfWorkers( lCorrCommandList, args, "Day-wise Correlation", int(args.nComputers)) else: def scriptWrapperForDayWiseCorrelation(pTrainingDay): lDate = os.path.basename(os.path.abspath(pTrainingDay)) lFileName = l_exp_dir + "/corr-date-" + lDate + "-td." + os.path.basename( os.path.abspath(args.td) ) + "-dt." + args.dt + attribute.generateExtension() + ".r" utility.runCommand([lFileName, '-d', pTrainingDay], args.run, args.sequence) results = map(scriptWrapperForDayWiseCorrelation, allWorkingFileDirectories)
def scriptWrapperForDayWiseCorrelation(pTrainingDay): lDate = os.path.basename(os.path.abspath(pTrainingDay)) lFileName = l_exp_dir + "/corr-date-" + lDate + "-td." + os.path.basename(os.path.abspath(args.td)) + "-dt." + args.dt + attribute.generateExtension() +".r" utility.runCommand([lFileName,'-d',pTrainingDay],args.run,args.sequence)
def main(): parser = argparse.ArgumentParser(description='Generates predict.r which will use design.model to make predictions. Sample command is pGenForE.py -e ob/e1/') parser.add_argument('-e', required=True,help='Directory to find the experiement designs') parser.add_argument('-a', required=True,help='Algorithm name') parser.add_argument('-pd', required=True,help='Prediction directory') parser.add_argument('-td',required=True,help="Day on which it was trained") parser.add_argument('-dt',required=True,help="Number of days it was trained") parser.add_argument('-wt',required=True,help="exp/default") parser.add_argument('-targetClass',required=True,help="For which model was used ; binomial(target takes only true and false) / multinomial (target values takes more than 2 values)") parser.add_argument('-skipP',required=False,help="yes or no , If you want to regenerate already generated algorithm prediction file then make this value No") parser.add_argument('-s',required=False,help="Experiment sub folders") parser.add_argument('-iT',required=False,help='Instrument name') parser.add_argument('-sP',required=False,help='Strike price of instrument') parser.add_argument('-oT',required=False,help='Options Type') parser.add_argument('-double',required=False,help='Double training of in model') args = parser.parse_args() attribute.initializeInstDetails(args.iT,args.sP,args.oT) if args.skipP == None: args.skipP = "yes" if args.s == None: args.s = args.e print "\nRunning pGen.py to generate the predict script" print "Using the experiment folder " + args.e config = ConfigObj(args.s+"/design.ini") print "The config parameters that I am working with are" print config dirName=os.path.dirname(args.s) if args.a is None: algo ='glmnet' else: algo =args.a import pdb #pdb.set_trace() if args.double: rProgName = "predict" + algo + "-td." + os.path.basename(os.path.abspath(args.td)) + "-dt." + args.dt + "-pd." + os.path.basename(os.path.abspath(args.pd)) \ + "-wt." + args.wt+ attribute.generateExtension() + "double.r" else: rProgName = "predict" + algo + "-td." + os.path.basename(os.path.abspath(args.td)) + "-dt." + args.dt + "-pd." + os.path.basename(os.path.abspath(args.pd)) \ + "-wt." + args.wt+ attribute.generateExtension() + ".r" rProgLocation = dirName+'/'+rProgName rScript = open(rProgLocation,'w') rScript.write('#!/usr/bin/Rscript \n') predictDataDirectoryName = args.pd.replace('/ro/','/wf/') predictDataDirectoryName = predictDataDirectoryName + "/p/" + os.path.basename(os.path.dirname(args.e)) + "/" if not os.path.exists(predictDataDirectoryName): os.mkdir(predictDataDirectoryName) if(args.a == 'glmnet'): rScript.write('require (glmnet) \n') elif(args.a == 'randomForest'): rScript.write('require (randomForest) \n') rCodeGen.ForSetUpChecks(rScript) lAllFilePresent = True for target in config['target']: if args.double: predictionFileName = predictDataDirectoryName + "/" + args.a + target +'-td.' + os.path.basename(os.path.abspath(args.td)) \ + '-dt.' + args.dt + '-targetClass.' + args.targetClass + '-f.' + os.path.basename(os.path.dirname(args.s)) + \ "-wt." + args.wt+ attribute.generateExtension() +"double.predictions" else: predictionFileName = predictDataDirectoryName + "/" + args.a + target +'-td.' + os.path.basename(os.path.abspath(args.td)) \ + '-dt.' + args.dt + '-targetClass.' + args.targetClass + '-f.' + os.path.basename(os.path.dirname(args.s)) + \ "-wt." + args.wt+ attribute.generateExtension() +".predictions" if os.path.isfile(predictionFileName) and ( args.skipP.lower() == "yes" ): continue else: lAllFilePresent = False break if lAllFilePresent == False: for target in config['target']: rCodeGen.ToReadFeatureFiles(rScript,config,target) rCodeGen.ForSanityChecks(rScript,config,target) if args.double: predictionFileName = predictDataDirectoryName + "/" + args.a + target +'-td.' + os.path.basename(os.path.abspath(args.td)) \ + '-dt.' + args.dt + '-targetClass.' + args.targetClass + '-f.' + os.path.basename(os.path.dirname(args.s)) + \ "-wt." + args.wt+ attribute.generateExtension() +"double.predictions" else: predictionFileName = predictDataDirectoryName + "/" + args.a + target +'-td.' + os.path.basename(os.path.abspath(args.td)) \ + '-dt.' + args.dt + '-targetClass.' + args.targetClass + '-f.' + os.path.basename(os.path.dirname(args.s)) + \ "-wt." + args.wt+ attribute.generateExtension() +".predictions" if not os.path.isfile(predictionFileName) or ( args.skipP.lower() == "no" ): if args.double: lModelGeneratedAfterTraining = args.s + '/' + args.a + target + '-td.' + os.path.basename(os.path.abspath(args.td)) + '-dt.' + args.dt + '-targetClass.' + args.targetClass + "-wt." + args.wt + 'double.model' rCodeGen.ForPredictions(rScript,config,args,args.s,target,2,"double") else: lModelGeneratedAfterTraining = args.s + '/' + args.a + target + '-td.' + os.path.basename(os.path.abspath(args.td)) + '-dt.' + args.dt + '-targetClass.' + args.targetClass + "-wt." + args.wt + '.model' rCodeGen.ForPredictions(rScript,config,args,args.s,target) print lModelGeneratedAfterTraining else: print predictionFileName + "Already exists , not generating it again . If you want to generate it again then rerun it with -skipP no " rScript.close() print "Finished generating R prediction program: " + rProgLocation os.system("chmod +x "+rProgLocation)
def scriptWrapperForPredictProgramRun(predictionDirAfterLastTD): scriptName=lExperimentFolderName+"/predict" + args.a + "-td." + os.path.basename(os.path.abspath(args.td)) + "-dt." + args.dt +"-pd." +\ os.path.basename(os.path.abspath(predictionDirAfterLastTD)) + "-wt." + args.wt + attribute.generateExtension() +".r" dirName = predictionDirAfterLastTD.replace('/ro/', '/wf/') utility.runCommand([scriptName, "-d", dirName], args.run, args.sequence)
def main(): parser = argparse.ArgumentParser(description='Generates train.r. A sample command is :- src/corrRGenForE.py -e ob/e/nsefut/CorExpHINDALCO/ -td ob/data/ro/nsefut/20141017/ -dt 10 -iT HINDALCO -sP -1 -oT 0') parser.add_argument('-e', required=True,help='Experiement folder to use to find the features and targets') parser.add_argument('-td',required=True,help="Day on which it was trained") parser.add_argument('-dt',required=True,help="Number of days it was trained") parser.add_argument('-iT',required=False,help='Instrument name') parser.add_argument('-sP',required=False,help='Strike price of instrument') parser.add_argument('-oT',required=False,help='Options Type') args = parser.parse_args() attribute.initializeInstDetails(args.iT,args.sP,args.oT) print "Using the experiment folder " + args.e print args.e+"/design.ini" config = ConfigObj(args.e+"/design.ini") print "The config parameters that I am working with are" print config dirName=os.path.dirname(args.e)+"/" trainingDaysDirectory = attribute.getListOfTrainingDirectoriesNames( int(args.dt) , args.td ,args.iT) for l_trainingday in trainingDaysDirectory: rProgName = "corr-date-"+ os.path.basename(os.path.abspath(l_trainingday)) +"-td." + os.path.basename(os.path.abspath(args.td)) + "-dt." + args.dt + attribute.generateExtension() +".r" rProgLocation = dirName+'/'+rProgName rScript = open(rProgLocation,'w') rScript.write('#!/usr/bin/Rscript \n') rCodeGen.ForSetUpChecks(rScript) lCorrelationFileName = dirName + '/correlation-coef-date-'+ os.path.basename(os.path.abspath(l_trainingday)) + '-td.' + os.path.basename(os.path.abspath(args.td))+ '-dt.' + args.dt + attribute.generateExtension() + ".coef" rCodeGen.ToReadTargetFile(rScript,config) for target in config['target']: rCodeGen.ToFindCorrelationDatewiseAndPrintingToFile(rScript,config,target,lCorrelationFileName) rScript.close() os.system("chmod +x "+rProgLocation)
def getPredictedValuesIntoDict(pPredictedValuesDict): # The following will take care if args.e = "ob/e1/" or args.e = "ob/e1" dirName = args.pd.replace('/ro/','/wf/') config = ConfigObj(args.e+"/design.ini") target = config["target"] lPredictedBuyValuesDict = dict() predictedBuyValuesFileName = dirName+"/p/"+mainExperimentName+"/"+args.a + target.keys()[0] + '-td.' + os.path.basename(os.path.abspath(args.td)) + \ '-dt.' + args.dt + '-targetClass.' + args.targetClass + '-f.' + experimentName + "-wt." + args.wt+ attribute.generateExtension() + ".predictions" print("Buy Predicted values file : "+ predictedBuyValuesFileName) sys.stdout.flush() predictedBuyValuesFile = open(predictedBuyValuesFileName) fileHasHeader = True numberOfLinesInBuyPredictedValuesFile = functionToReadPredictionFileToDictionary(predictedBuyValuesFile,lPredictedBuyValuesDict,fileHasHeader) print("Finished reading the buy predicted values file") print("The number of elements in the buy predicted values dictionary is : " + str(len(lPredictedBuyValuesDict))) if (numberOfLinesInBuyPredictedValuesFile != len(lPredictedBuyValuesDict)): print("Number of duplicate time stamps rejected in buy predicted values dictionary = " + str(numberOfLinesInBuyPredictedValuesFile - len(lPredictedBuyValuesDict))) # os._exit(-1) sys.stdout.flush() lPredictedSellValuesDict = dict() predictedSellValuesFileName = dirName+"/p/"+mainExperimentName+"/"+args.a + target.keys()[1] + '-td.' + os.path.basename(os.path.abspath(args.td)) + \ '-dt.' + args.dt + '-targetClass.' + args.targetClass + '-f.' + experimentName + "-wt." + args.wt + attribute.generateExtension()+ ".predictions" print("Sell Predicted values file : "+ predictedSellValuesFileName) sys.stdout.flush() predictedSellValuesFile = open(predictedSellValuesFileName) fileHasHeader = True numberOfLinesInSellPredictedValuesFile = functionToReadPredictionFileToDictionary(predictedSellValuesFile,lPredictedSellValuesDict,fileHasHeader) print("Finished reading the sell predicted values file") print("The number of elements in the sell predicted values dictionary is : " + str(len(lPredictedSellValuesDict))) if (numberOfLinesInSellPredictedValuesFile != len(lPredictedSellValuesDict)): print("Number of duplicate timestamps rejected in sell predicted values dictionary = " + str(numberOfLinesInSellPredictedValuesFile - len(lPredictedSellValuesDict))) # os._exit(-1) sys.stdout.flush() #-----------------Getting predicted values into dictionary ------------------------------------- for elements in lPredictedBuyValuesDict.keys(): pPredictedValuesDict[elements] = {} pPredictedValuesDict[elements]['buy'] = lPredictedBuyValuesDict[elements] pPredictedValuesDict[elements]['sell'] = lPredictedSellValuesDict[elements]
pLPerLotLong=(averageCloseSellPrice - averageOpenBuyPrice)* 1000 print("1 lot has 1000 qty's so P/L Short per lot is: " + str(pLPerLotShort), file = outputFile) print("1 lot has 1000 qty's so P/L Long per lot is: " + str(pLPerLotLong), file = outputFile) print("P/L for Short trading 10 lots is: " + str(pLPerLotShort * 10), file = outputFile) print("P/L for Long trading 10 lots is: " + str(pLPerLotLong * 10), file = outputFile) if __name__ == "__main__": tStart = datetime.now() dirName = args.pd.replace('/ro/','/rs/') checkAllFilesAreExistOrNot = 'false' lWFDirName = args.pd.replace('/ro/','/wf/') if args.double: predictedBuyValuesFileName = lWFDirName+"/p/"+mainExperimentName+"/"+args.a + 'buy' + '-td.' + os.path.basename(os.path.abspath(args.td)) + '-dt.' + \ args.dt + '-targetClass.' + args.targetClass + '-f.' + experimentName + "-wt." + args.wt+ attribute.generateExtension() + "double.predictions" predictedSellValuesFileName = lWFDirName+"/p/"+mainExperimentName+"/"+args.a + 'sell' + '-td.' + os.path.basename(os.path.abspath(args.td)) + '-dt.' +\ args.dt + '-targetClass.' + args.targetClass + '-f.' + experimentName + "-wt." + args.wt+ attribute.generateExtension() + "double.predictions" else: predictedBuyValuesFileName = lWFDirName+"/p/"+mainExperimentName+"/"+args.a + 'buy' + '-td.' + os.path.basename(os.path.abspath(args.td)) + '-dt.' + \ args.dt + '-targetClass.' + args.targetClass + '-f.' + experimentName + "-wt." + args.wt+ attribute.generateExtension() + ".predictions" predictedSellValuesFileName = lWFDirName+"/p/"+mainExperimentName+"/"+args.a + 'sell' + '-td.' + os.path.basename(os.path.abspath(args.td)) + '-dt.' +\ args.dt + '-targetClass.' + args.targetClass + '-f.' + experimentName + "-wt." + args.wt+ attribute.generateExtension() + ".predictions" lEntryClList = args.entryCL.split(";") lExitClList = args.exitCL.split(";") if len(lEntryClList)!= len(lExitClList): print("Len of entry and exit list does match. Entry List length = " , len(lEntryClList) , " and ExitCL List length = " , len(lExitClList)) os._exit(-1)
def main(): parser = argparse.ArgumentParser(description='Generates train.r. A sample command is mGenForE.py -e ob/e1/ ') parser.add_argument('-e', required=True,help='Experiement folder to use to find the features and targets') parser.add_argument('-a', required=True,help='Algorithm name') parser.add_argument('-targetClass',required=True,help="binomial(target takes only true and false) / multinomial (target values takes more than 2 values)") parser.add_argument('-skipM',required=False,help="yes or no , If you want to regenerate already generated algorithm model file then make this value No") parser.add_argument('-td',required=True,help="Day on which it was trained") parser.add_argument('-dt',required=True,help="Number of days it was trained") parser.add_argument('-wt',required=True,help="default/exp , weight type to be given to different days") parser.add_argument('-iT',required=False,help='Instrument name') parser.add_argument('-sP',required=False,help='Strike price of instrument') parser.add_argument('-oT',required=False,help='Options Type') parser.add_argument('-double',required=False,help='Double training of in model') args = parser.parse_args() attribute.initializeInstDetails(args.iT,args.sP,args.oT) if args.skipM == None: args.skipM = "yes" print "Using the experiment folder " + args.e config = ConfigObj(args.e+"/design.ini") print "The config parameters that I am working with are" print config dirName=os.path.dirname(args.e)+"/" algo = rCodeGen.getAlgoName(args) if args.double: rProgName = "train" + algo + "-td." + os.path.basename(os.path.abspath(args.td)) + "-dt." + args.dt + "-wt." + args.wt + attribute.generateExtension() +"double.r" else: rProgName = "train" + algo + "-td." + os.path.basename(os.path.abspath(args.td)) + "-dt." + args.dt + "-wt." + args.wt + attribute.generateExtension() +".r" rProgLocation = dirName+'/'+rProgName rScript = open(rProgLocation,'w') rScript.write('#!/usr/bin/Rscript \n') if(algo == 'glmnet'): rScript.write('require (glmnet) \n') elif(algo == 'randomForest'): rScript.write('require (randomForest) \n') elif(algo == 'mda'): rScript.write('require (mda) \n') rCodeGen.ForSetUpChecks(rScript) lAllFilePresent = True for target in config['target']: if args.double: lModelGeneratedAfterTraining = dirName + '/' + algo + target + '-td.' + os.path.basename(os.path.abspath(args.td))\ + '-dt.' + args.dt + '-targetClass.' + args.targetClass + "-wt." + args.wt+ attribute.generateExtension() +'double.model' else: lModelGeneratedAfterTraining = dirName + '/' + algo + target + '-td.' + os.path.basename(os.path.abspath(args.td))\ + '-dt.' + args.dt + '-targetClass.' + args.targetClass + "-wt." + args.wt+ attribute.generateExtension() +'.model' if os.path.isfile(lModelGeneratedAfterTraining) and ( args.skipM.lower() == "yes" ): continue else: lAllFilePresent = False break if lAllFilePresent == False: rCodeGen.ToReadTargetFile(rScript,config) rCodeGen.ForWtVectorGeneration(rScript,args.wt.lower()) for target in config['target']: rCodeGen.ToReadFeatureFiles(rScript,config,target) rCodeGen.ForSanityChecks(rScript,config,target) if args.double: lModelGeneratedAfterTraining = dirName + '/' + algo + target + '-td.' + os.path.basename(os.path.abspath(args.td))\ + '-dt.' + args.dt + '-targetClass.' + args.targetClass + "-wt." + args.wt+ attribute.generateExtension() +'double.model' lTempModelName = dirName + '/' + algo + target + '-td.' + os.path.basename(os.path.abspath(args.td))\ + '-dt.' + args.dt + '-targetClass.' + args.targetClass + "-wt." + args.wt+ attribute.generateExtension() +'.model' else: lModelGeneratedAfterTraining = dirName + '/' + algo + target + '-td.' + os.path.basename(os.path.abspath(args.td))\ + '-dt.' + args.dt + '-targetClass.' + args.targetClass + "-wt." + args.wt+ attribute.generateExtension() +'.model' if os.path.isfile(lModelGeneratedAfterTraining) and ( args.skipM.lower() == "yes" ): print "Model File " + lModelGeneratedAfterTraining + " already exists . So it will not be formed again . So it will not be formed again . If you want to re-generate model then re-run with -skipM=No" else: rCodeGen.ToCreateDataFrameForTraining(rScript,config,target) if args.double: if os.path.isfile(lTempModelName): rCodeGen.ForLoadingModel(rScript,args,dirName,target,config) else: rCodeGen.ForTraining(rScript,args,config,target) rCodeGen.forPreparingWtVectorForDoubleTraining(rScript,args,target) rCodeGen.saveTrainingModel(rScript,args,dirName,target,"double") else: rCodeGen.ForTraining(rScript,args,config,target) rCodeGen.saveTrainingModel(rScript,args,dirName,target) rScript.close() print "Finished generating R training program: " + rProgLocation os.system("chmod +x "+rProgLocation)
index("/nsefut/") + 8:] elif 'nseopt' in absPathOfExperimentName: pathAfterE = absPathOfExperimentName[absPathOfExperimentName. index("/nseopt/") + 8:] if "/" in pathAfterE: mainExperimentName = pathAfterE[:pathAfterE.index("/")] else: mainExperimentName = pathAfterE experimentName = os.path.basename(absPathOfExperimentName) gTickSize = int(args.tickSize) gMaxQty = int(args.orderQty) initialFileName = 'TradeOnTarget-d.' + os.path.basename(os.path.abspath( args.d)) + attribute.generateExtension( ) + "-tq." + args.orderQty + attribute.generateExtension() + "-dte.7" g_quantity_adjustment_list_for_sell = {} g_quantity_adjustment_list_for_buy = {} gOpenBuyPrice = 0.0 gCloseSellPrice = 0.0 gOpenSellPrice = 0.0 gCloseBuyPrice = 0.0 def functionToReadTargetFileToDictionary(pTargetValuesFile, pTargetValuesDict, pFileHeader): lNumberOfLinesInTargetValuesFile = 0 for line in pTargetValuesFile: if pFileHeader == True: pFileHeader = False
print("1 lot has 1000 qty's so P/L Long per lot is: " + str(pLPerLotLong), file=outputFile) print("P/L for Short trading 10 lots is: " + str(pLPerLotShort * 10), file=outputFile) print("P/L for Long trading 10 lots is: " + str(pLPerLotLong * 10), file=outputFile) if __name__ == "__main__": tStart = datetime.now() dirName = args.pd.replace('/ro/', '/rs/') checkAllFilesAreExistOrNot = 'false' lWFDirName = args.pd.replace('/ro/', '/wf/') predictedBuyValuesFileName = lWFDirName+"/p/"+mainExperimentName+"/"+args.a + 'buy' + '-td.' + os.path.basename(os.path.abspath(args.td)) + '-dt.' + \ args.dt + '-targetClass.' + args.targetClass + '-f.' + buyExperimentName + "-wt." + args.wt+ attribute.generateExtension() + ".predictions" predictedSellValuesFileName = lWFDirName+"/p/"+mainExperimentName+"/"+args.a + 'sell' + '-td.' + os.path.basename(os.path.abspath(args.td)) + '-dt.' +\ args.dt + '-targetClass.' + args.targetClass + '-f.' + sellExperimentName + "-wt." + args.wt+ attribute.generateExtension() + ".predictions" lEntryClList = args.entryCL.split(";") lExitClList = args.exitCL.split(";") if len(lEntryClList) != len(lExitClList): print("Len of entry and exit list does match. Entry List length = ", len(lEntryClList), " and ExitCL List length = ", len(lExitClList)) os._exit(-1) lengthOfList = len(lEntryClList) lMinOfExitCl = 9999.000 fileNameList = []
for designFile in designFiles: lExperimentFolderName = os.path.dirname(designFile) + "/" experimentFolderDirectory.append(lExperimentFolderName) indexOfFeatures += 1 else: experimentFolderDirectory.append(args.e) print "Experiment Folder Lsit ", experimentFolderDirectory #==========Running the model in serial mode and rest thing in serial , lp or dp mode as given for lExperimentFolderName in experimentFolderDirectory: # utility.runCommand(["mRGenForE.py","-e",lExperimentFolderName,"-a",args.a,"-targetClass",args.targetClass,"-skipM",args.skipM,"-td",args.td, "-dt" , \ # args.dt , '-wt' , args.wt,"-iT",args.iT,"-oT",args.oT,"-sP",args.sP ] , args.run , "serial") scriptName = lExperimentFolderName + "/train" + args.a + "-td." + os.path.basename( os.path.abspath(args.td) ) + "-dt." + args.dt + "-wt." + args.wt + attribute.generateExtension( ) + ".r" trainingDataListString = ";".join(trainingDaysDirectory).replace( "/ro/", "/wf/") # utility.runCommand([scriptName,"-d",trainingDataListString] , args.run , "serial") if args.sequence == "dp": #=========Putting all command in alist-======== lRCodeGenCommandList = [] lPGenRCodeList = [] lTradingCommandList = [] for i in range(len(predictionDaysDirectory)): predictionDirAfterLastTD = predictionDaysDirectory[i] lRCodeGenCommandList.append((["pRGenForE.py","-e",args.e,"-s",lExperimentFolderName,"-a",args.a,"-skipP",args.skipP,"-td",args.td , "-pd" , predictionDirAfterLastTD , "-dt" , args.dt ,\ "-targetClass" , args.targetClass , '-wt' , args.wt ,"-iT",args.iT,"-oT",args.oT,"-sP",args.sP])) scriptName=lExperimentFolderName+"/predict" + args.a + "-td." + os.path.basename(os.path.abspath(args.td)) + "-dt." + args.dt +"-pd." +\
def main(): parser = argparse.ArgumentParser( description= 'Generates predict.r which will use design.model to make predictions. Sample command is pGenForE.py -e ob/e1/' ) parser.add_argument('-e', required=True, help='Directory to find the experiement designs') parser.add_argument('-a', required=True, help='Algorithm name') parser.add_argument('-pd', required=True, help='Prediction directory') parser.add_argument('-td', required=True, help="Day on which it was trained") parser.add_argument('-dt', required=True, help="Number of days it was trained") parser.add_argument('-wt', required=True, help="exp/default") parser.add_argument( '-targetClass', required=True, help= "For which model was used ; binomial(target takes only true and false) / multinomial (target values takes more than 2 values)" ) parser.add_argument( '-skipP', required=False, help= "yes or no , If you want to regenerate already generated algorithm prediction file then make this value No" ) parser.add_argument('-s', required=False, help="Experiment sub folders") parser.add_argument('-iT', required=False, help='Instrument name') parser.add_argument('-sP', required=False, help='Strike price of instrument') parser.add_argument('-oT', required=False, help='Options Type') parser.add_argument('-double', required=False, help='Double training of in model') args = parser.parse_args() attribute.initializeInstDetails(args.iT, args.sP, args.oT) if args.skipP == None: args.skipP = "yes" if args.s == None: args.s = args.e print "\nRunning pGen.py to generate the predict script" print "Using the experiment folder " + args.e config = ConfigObj(args.s + "/design.ini") print "The config parameters that I am working with are" print config dirName = os.path.dirname(args.s) if args.a is None: algo = 'glmnet' else: algo = args.a import pdb #pdb.set_trace() if args.double: rProgName = "predict" + algo + "-td." + os.path.basename(os.path.abspath(args.td)) + "-dt." + args.dt + "-pd." + os.path.basename(os.path.abspath(args.pd)) \ + "-wt." + args.wt+ attribute.generateExtension() + "double.r" else: rProgName = "predict" + algo + "-td." + os.path.basename(os.path.abspath(args.td)) + "-dt." + args.dt + "-pd." + os.path.basename(os.path.abspath(args.pd)) \ + "-wt." + args.wt+ attribute.generateExtension() + ".r" rProgLocation = dirName + '/' + rProgName rScript = open(rProgLocation, 'w') rScript.write('#!/usr/bin/Rscript \n') predictDataDirectoryName = args.pd.replace('/ro/', '/wf/') predictDataDirectoryName = predictDataDirectoryName + "/p/" + os.path.basename( os.path.dirname(args.e)) + "/" if not os.path.exists(predictDataDirectoryName): os.mkdir(predictDataDirectoryName) if (args.a == 'glmnet'): rScript.write('require (glmnet) \n') elif (args.a == 'randomForest'): rScript.write('require (randomForest) \n') rCodeGen.ForSetUpChecks(rScript) lAllFilePresent = True for target in config['target']: if args.double: predictionFileName = predictDataDirectoryName + "/" + args.a + target +'-td.' + os.path.basename(os.path.abspath(args.td)) \ + '-dt.' + args.dt + '-targetClass.' + args.targetClass + '-f.' + os.path.basename(os.path.dirname(args.s)) + \ "-wt." + args.wt+ attribute.generateExtension() +"double.predictions" else: predictionFileName = predictDataDirectoryName + "/" + args.a + target +'-td.' + os.path.basename(os.path.abspath(args.td)) \ + '-dt.' + args.dt + '-targetClass.' + args.targetClass + '-f.' + os.path.basename(os.path.dirname(args.s)) + \ "-wt." + args.wt+ attribute.generateExtension() +".predictions" if os.path.isfile(predictionFileName) and (args.skipP.lower() == "yes"): continue else: lAllFilePresent = False break if lAllFilePresent == False: for target in config['target']: rCodeGen.ToReadFeatureFiles(rScript, config, target) rCodeGen.ForSanityChecks(rScript, config, target) if args.double: predictionFileName = predictDataDirectoryName + "/" + args.a + target +'-td.' + os.path.basename(os.path.abspath(args.td)) \ + '-dt.' + args.dt + '-targetClass.' + args.targetClass + '-f.' + os.path.basename(os.path.dirname(args.s)) + \ "-wt." + args.wt+ attribute.generateExtension() +"double.predictions" else: predictionFileName = predictDataDirectoryName + "/" + args.a + target +'-td.' + os.path.basename(os.path.abspath(args.td)) \ + '-dt.' + args.dt + '-targetClass.' + args.targetClass + '-f.' + os.path.basename(os.path.dirname(args.s)) + \ "-wt." + args.wt+ attribute.generateExtension() +".predictions" if not os.path.isfile(predictionFileName) or (args.skipP.lower() == "no"): if args.double: lModelGeneratedAfterTraining = args.s + '/' + args.a + target + '-td.' + os.path.basename( os.path.abspath(args.td) ) + '-dt.' + args.dt + '-targetClass.' + args.targetClass + "-wt." + args.wt + 'double.model' rCodeGen.ForPredictions(rScript, config, args, args.s, target, 2, "double") else: lModelGeneratedAfterTraining = args.s + '/' + args.a + target + '-td.' + os.path.basename( os.path.abspath(args.td) ) + '-dt.' + args.dt + '-targetClass.' + args.targetClass + "-wt." + args.wt + '.model' rCodeGen.ForPredictions(rScript, config, args, args.s, target) print lModelGeneratedAfterTraining else: print predictionFileName + "Already exists , not generating it again . If you want to generate it again then rerun it with -skipP no " rScript.close() print "Finished generating R prediction program: " + rProgLocation os.system("chmod +x " + rProgLocation)
if "/" in pathAfterE: mainExperimentName = pathAfterE[:pathAfterE.index("/")] else: mainExperimentName = pathAfterE experimentName = os.path.basename(absPathOfExperimentName) gTickSize = int(args.tickSize) gMaxQty = int(args.orderQty) totalEntryCL = args.entryCL.split(";") totalExitCL = args.exitCL.split(";") initialFileName = [] for indexOfCL in range(0,len(totalEntryCL)): lInitialFileName = args.a + '-td.' + os.path.basename(os.path.abspath(args.td)) + \ '-dt.' + args.dt + '-targetClass.' + args.targetClass + '-f.' + experimentName + "-wt." + args.wt+ attribute.generateExtension() + \ '-l.'+totalEntryCL[indexOfCL]+"-"+totalExitCL[indexOfCL] + "-tq." + args.orderQty + "-te.7" initialFileName.append(lInitialFileName) g_quantity_adjustment_list_for_sell = {} g_quantity_adjustment_list_for_buy = {} def functionToReadPredictionFileToDictionary(pPredictedValuesFile,pPredictedValuesDict,pFileHeader): lNumberOfLinesInPredictedValuesFile = 0 for line in pPredictedValuesFile: if pFileHeader == True: pFileHeader = False continue line=line.rstrip('\n') splitLine = line.split(',')
else: mainExperimentName = pathAfterE mainExperimentNameList.append(mainExperimentName) experimentName = os.path.basename(absPathOfExperimentName) sys.path.append("./src/") sys.path.append("./ob/generators/") config = ConfigObj(experiment + "/design.ini") featureTargetFilePath = args.pd.replace('ro', 'wf') for feature in config["features-buy"]: lName = config["features-buy"][feature].replace('(', '').replace(')', '') if lName not in featureNames: lFeatureFile = featureTargetFilePath + "/f/" + lName + attribute.generateExtension( ) + ".feature" featureFP = open(lFeatureFile, "rb") featureFpList.append(featureFP) featureNames.append(lName) for feature in config["features-sell"]: lName = config["features-sell"][feature].replace('(', '').replace(')', '') if lName not in featureNames: lFeatureFile = featureTargetFilePath + "/f/" + lName + attribute.generateExtension( ) + ".feature" featureFP = open(lFeatureFile, "rb") featureFpList.append(featureFP) featureNames.append(lName) dirName = args.pd.replace('/ro/', '/wf/')
def main(): parser = argparse.ArgumentParser(description='Generates train.r. A sample command is mGenForE.py -e ob/e1/ ') parser.add_argument('-e', required=True,help='Experiement folder to use to find the features and targets') parser.add_argument('-a', required=True,help='Algorithm name') parser.add_argument('-s', required=True,help='Location of the folder containing all the sub experiments') parser.add_argument('-targetClass',required=True,help="binomial(target takes only true and false) / multinomial (target values takes more than 2 values)") parser.add_argument('-skipM',required=False,help="yes or no , If you want to regenerate already generated algorithm model file then make this value No") parser.add_argument('-td',required=True,help="Day on which it was trained") parser.add_argument('-dt',required=True,help="Number of days it was trained") parser.add_argument('-wt',required=True,help="default/exp , weight type to be given to different days") parser.add_argument('-iT',required=False,help='Instrument name') parser.add_argument('-sP',required=False,help='Strike price of instrument') parser.add_argument('-oT',required=False,help='Options Type') args = parser.parse_args() attribute.initializeInstDetails(args.iT,args.sP,args.oT) if args.skipM == None: args.skipM = "yes" print "Using the experiment folder " + args.e config = ConfigObj(args.e+"/design.ini") print "The config parameters that I am working with are" print config dirName=os.path.dirname(args.e) algo = rCodeGen.getAlgoName(args) args.s = args.s + "/" rProgName = "train" + algo + "-td." + os.path.basename(os.path.abspath(args.td)) + "-dt." + args.dt + "-wt." + args.wt+ attribute.generateExtension() +\ "-For"+os.path.basename(os.path.dirname(args.s))+"SubE.r" rProgLocation = dirName+'/'+rProgName rScript = open(rProgLocation,'w') rScript.write('#!/usr/bin/Rscript \n') if(algo == 'glmnet'): rScript.write('require (glmnet) \n') elif(algo == 'randomForest'): rScript.write('require (randomForest) \n') elif(algo == 'mda'): rScript.write('require (mda) \n') rCodeGen.ForSetUpChecks(rScript) rCodeGen.ToReadTargetFile(rScript,config) rCodeGen.ForWtVectorGeneration(rScript,args.wt.lower()) for target in config['target']: rCodeGen.ToReadFeatureFiles(rScript,config,target) rCodeGen.ForSanityChecks(rScript,config,target) designFiles = utility.list_files(args.s) for designFile in designFiles: print "Generating r code for " + designFile rScript.write('\n\nprint ("Running r code for ' + designFile + '")\n') config = ConfigObj(designFile) for target in config['target']: lModelGeneratedAfterTraining = os.path.dirname(designFile) + '/' + algo + target + '-td.' + os.path.basename(os.path.abspath(args.td)) + \ '-dt.' + args.dt + '-targetClass.' + args.targetClass + "-wt." + args.wt+ attribute.generateExtension() + '.model' if os.path.isfile(lModelGeneratedAfterTraining)and ( args.skipM.lower() == "yes" ): print "Model File " + lModelGeneratedAfterTraining + " already exists . So it will not be formed again . If you want to re-generate model then re-run with -skipM=No" else: rCodeGen.ToCreateDataFrameForTraining(rScript,config,target) rCodeGen.ForTraining(rScript,args,config,target) rCodeGen.saveTrainingModel(rScript,args,os.path.dirname(designFile),target) rScript.write('rm(list=ls())') rScript.close() print "Finished generating R training program: " + rProgLocation os.system("chmod +x "+rProgLocation)
def ToReadFeatureFiles(rScript,config,targetVariable,pUseWhichArgumentForData=2): features = config["features-"+targetVariable] rScript.write('\nprint ("Section3: Read feature files") \n') if pUseWhichArgumentForData == 4: rScript.write('lDirectorySet<-strsplit(args[4],";",fixed=TRUE,useBytes=FALSE)\n') else: rScript.write('lDirectorySet<-strsplit(args[2],";",fixed=TRUE,useBytes=FALSE)\n') for feature in features: userFriendlyName = features[feature] userFriendlyName = userFriendlyName.replace('[','') userFriendlyName = userFriendlyName.replace(']','') userFriendlyName = userFriendlyName.replace('(','') userFriendlyName = userFriendlyName.replace(')','') featureNameWithoutBrackets = features[feature].replace('(','').replace(')','') + attribute.generateExtension() rScript.write('lFlag=FALSE\n') rScript.write('for (file in lDirectorySet[[1]]){\n') rScript.write(' if (!lFlag){\n') rScript.write(' load(paste(file,"/f/'+featureNameWithoutBrackets+'.bin",sep=""))\n') rScript.write(' '+feature+targetVariable+'<-get("'+userFriendlyName+'")' + skipRowCode + ' \n') rScript.write(' rm("' + userFriendlyName + '")\n') rScript.write(' lFlag=TRUE\n') rScript.write(' }\n') rScript.write(' else {\n') rScript.write(' load(paste(file,"/f/'+featureNameWithoutBrackets+'.bin",sep=""))\n') rScript.write(' temp<-get("'+userFriendlyName+ '")' + skipRowCode + '\n') rScript.write(' rm("' + userFriendlyName + '")\n') rScript.write(' '+feature+targetVariable+'<-rbind('+feature+targetVariable+',temp)\n') rScript.write(' rm(temp)\n') rScript.write(' }\n') rScript.write(' print ("Reading '+ featureNameWithoutBrackets +'.feature' + '") \n') rScript.write('}\n')
def ToFindCorrelationDatewiseAndPrintingToFile(rScript,config,pTargetVariableKey,pFileName): features = config["features-"+pTargetVariableKey] rScript.write('\nprint ("Section6: To Find Correlation For ' +pTargetVariableKey +'") \n') rScript.write('string_intercept = paste("CorrelationCoeficient Of ","' + pTargetVariableKey + '" , ":- ","\\n",sep="")\n') rScript.write('cat(string_intercept,file="'+ pFileName + '",sep="",append=TRUE)\n') for feature in features: userFriendlyName = features[feature] userFriendlyName = userFriendlyName.replace('[','') userFriendlyName = userFriendlyName.replace(']','') userFriendlyName = userFriendlyName.replace('(','') userFriendlyName = userFriendlyName.replace(')','') featureNameWithoutBrackets = features[feature].replace('(','').replace(')','') + attribute.generateExtension() rScript.write('lFlag=FALSE\n') rScript.write('for (file in lDirectorySet[[1]]){\n') rScript.write(' if (!lFlag){\n') rScript.write(' load(paste(file,"/f/'+featureNameWithoutBrackets+'.bin",sep=""))\n') rScript.write(' '+feature+pTargetVariableKey+'<-get("'+userFriendlyName+'")' + skipRowCode + ' \n') rScript.write(' rm("' + userFriendlyName + '")\n') rScript.write(' lFlag=TRUE\n') rScript.write(' }\n') rScript.write(' else {\n') rScript.write(' load(paste(file,"/f/'+featureNameWithoutBrackets+'.bin",sep=""))\n') rScript.write(' temp<-get("'+userFriendlyName+ '")' + skipRowCode + '\n') rScript.write(' rm("' + userFriendlyName + '")\n') rScript.write(' '+feature+pTargetVariableKey+'<-rbind('+feature+pTargetVariableKey+',temp)\n') rScript.write(' rm(temp)\n') rScript.write(' }\n') rScript.write(' print ("Reading '+ featureNameWithoutBrackets +'.feature' + '") \n') rScript.write('}\n') userFriendlyName = features[feature] rScript.write('tempXY <- sum('+pTargetVariableKey+'[,2] * '+ feature+pTargetVariableKey+'[,2] )\n') rScript.write('tempY2 <- sum('+pTargetVariableKey+'[,2] ^ 2 )\n') rScript.write('tempX2 <- sum('+feature+pTargetVariableKey+'[,2] ^ 2 )\n') rScript.write('tempY <- sum('+pTargetVariableKey+'[,2] )\n') rScript.write('tempX <- sum('+feature+pTargetVariableKey+'[,2] )\n') rScript.write('n <- length('+feature+pTargetVariableKey+'[,2] )\n') rScript.write('string_intercept = paste("'+ userFriendlyName +'_XY" ,"=",toString(tempXY),"\\n"') rScript.write(',"'+ userFriendlyName +'_Y2" ,"=",toString(tempY2),"\\n"') rScript.write(',"'+ userFriendlyName +'_X2" ,"=",toString(tempX2),"\\n"') rScript.write(',"'+ userFriendlyName +'_Y" ,"=",toString(tempY),"\\n"') rScript.write(',"'+ userFriendlyName +'_X" ,"=",toString(tempX),"\\n"') rScript.write(',"'+ userFriendlyName +'_n" ,"=",toString(n),"\\n"') rScript.write(',sep="")\n') rScript.write('cat(string_intercept,file="'+ pFileName + '",sep="",append=TRUE)\n') rScript.write('rm('+ feature+pTargetVariableKey + ')\n') rScript.write('string_intercept = paste("\\n","\\n",sep="")\n') rScript.write('cat(string_intercept,file="'+ pFileName + '",sep="",append=TRUE)\n')
else: mainExperimentName = pathAfterE mainExperimentNameList.append(mainExperimentName) experimentName = os.path.basename(absPathOfExperimentName) sys.path.append("./src/") sys.path.append("./ob/generators/") config = ConfigObj(experiment+"/design.ini") featureTargetFilePath = args.pd.replace('ro', 'wf') for feature in config["features-buy"]: lName = config["features-buy"][feature].replace('(','').replace(')','') if lName not in featureNames: lFeatureFile = featureTargetFilePath + "/f/" + lName+ attribute.generateExtension() + ".feature" featureFP = open(lFeatureFile, "rb") featureFpList.append(featureFP) featureNames.append(lName) for feature in config["features-sell"]: lName = config["features-sell"][feature].replace('(','').replace(')','') if lName not in featureNames: lFeatureFile = featureTargetFilePath + "/f/" + lName + attribute.generateExtension() + ".feature" featureFP = open(lFeatureFile, "rb") featureFpList.append(featureFP) featureNames.append(lName) dirName = args.pd.replace('/ro/','/wf/') targetSet = config['target'] # for target in targetSet.keys():