Ejemplo n.º 1
0
def saveTrainingModel(rScript,
                      args,
                      path,
                      pTargetVariableKey,
                      pDouble="",
                      treeOrNot="",
                      treeFileName=""):
    algo = getAlgoName(args)
    if len(pDouble) == 0:
        outputFileName = path+'/'+algo+pTargetVariableKey+ '-td.' + os.path.basename(os.path.abspath(args.td)) + '-dt.' + args.dt + '-targetClass.' + \
                         args.targetClass + "-wt." + args.wt+ attribute.generateExtension()  +'.model'
        modelValueFileName = path+'/'+algo+ '-td.' + os.path.basename(os.path.abspath(args.td)) + '-dt.' + args.dt + '-targetClass.' + \
                         args.targetClass + "-wt." + args.wt+ attribute.generateExtension()  +'.coef'
    else:
        outputFileName = path+'/'+algo+pTargetVariableKey+ '-td.' + os.path.basename(os.path.abspath(args.td)) + '-dt.' + args.dt + '-targetClass.' + \
                         args.targetClass + "-wt." + args.wt+ attribute.generateExtension()  +'double.model'
        modelValueFileName = path+'/'+algo+ '-td.' + os.path.basename(os.path.abspath(args.td)) + '-dt.' + args.dt + '-targetClass.' + \
                         args.targetClass + "-wt." + args.wt+ attribute.generateExtension()  +'double.coef'
    rScript.write('\nprint (paste("Section8: Saving the model in file ' +
                  outputFileName + '")) \n')
    rScript.write('save(fit, file = "' + outputFileName + '")\n')
    rScript.write('l = coef(fit, s = "lambda.min")\n')
    rScript.write('string_intercept = paste("' + pTargetVariableKey +
                  '" , "-intercept-value = ",toString(l[1]),"\\n",sep="")\n')
    rScript.write(
        'string_intercept = paste(string_intercept,"vector-of-alphas-' +
        pTargetVariableKey + ' = ",sep="")\n')
    rScript.write('for(i in 2:length(l)){\n')
    rScript.write(
        '    string_intercept = paste(string_intercept,l[i],",",sep="")\n')
    rScript.write('}\n')
    rScript.write('string_intercept = paste(string_intercept,"\\n",sep="")\n')
    rScript.write('cat(string_intercept,file="' + modelValueFileName +
                  '",sep="",append=TRUE)\n')
Ejemplo n.º 2
0
Archivo: rCodeGen.py Proyecto: grmte/ml
def ToReadTargetFile(rScript,config):
    rScript.write('print ("Section2: Read target files") \n')
    lTargetSet = config["target"]
    rScript.write('lDirectorySet<-strsplit(args[2],";",fixed=TRUE,useBytes=FALSE)\n')
    for target in lTargetSet:
        userFriendlyName = lTargetSet[target]
        userFriendlyName = userFriendlyName.replace('[','')
        userFriendlyName = userFriendlyName.replace(']','')
        fileToRead = lTargetSet[target]+ attribute.generateExtension() 
        rScript.write('lengthOfEachDay = numeric()\n')
        rScript.write('lFlag=FALSE\n')
        rScript.write('for (file in lDirectorySet[[1]]){\n')
        rScript.write('    if (!lFlag){\n')
        rScript.write('        load(paste(file,"/t/'+fileToRead+'.bin",sep=""))\n')
        rScript.write('        ' + target+'<- ' + userFriendlyName + skipRowCode + '\n')
        rScript.write('        rm(' + userFriendlyName + ')\n')
        rScript.write('        lengthOfEachDay = c(lengthOfEachDay,nrow(' + target + '))\n')
        rScript.write('        lFlag=TRUE\n')   
        rScript.write('    }\n')
        rScript.write('    else{\n')
        rScript.write('        load(paste(file,"/t/'+fileToRead+'.bin",sep=""))\n')
        rScript.write('        temp<-' + userFriendlyName + skipRowCode + '\n')
        rScript.write('        rm(' + userFriendlyName + ')\n')
        rScript.write('        lengthOfEachDay = c(lengthOfEachDay,nrow(temp))\n')
        rScript.write('        '+target+'<-rbind('+target+',temp)\n')
        rScript.write('        rm(temp)\n')
        rScript.write('    }\n')
        rScript.write('    print ("Reading '+ fileToRead +'.target' + '") \n')
        rScript.write('}\n')
Ejemplo n.º 3
0
 def scriptWrapperForDayWiseCorrelation(pTrainingDay):
     lDate = os.path.basename(os.path.abspath(pTrainingDay))
     lFileName = l_exp_dir + "/corr-date-" + lDate + "-td." + os.path.basename(
         os.path.abspath(args.td)
     ) + "-dt." + args.dt + attribute.generateExtension() + ".r"
     utility.runCommand([lFileName, '-d', pTrainingDay], args.run,
                        args.sequence)
Ejemplo n.º 4
0
def main():
    parser = argparse.ArgumentParser(
        description=
        'Generates train.r. A sample command is :- src/corrRGenForE.py -e ob/e/nsefut/CorExpHINDALCO/ -td ob/data/ro/nsefut/20141017/ -dt 10 -iT HINDALCO -sP -1 -oT 0'
    )
    parser.add_argument(
        '-e',
        required=True,
        help='Experiement folder to use to find the features and targets')
    parser.add_argument('-td',
                        required=True,
                        help="Day on which it was trained")
    parser.add_argument('-dt',
                        required=True,
                        help="Number of days it was trained")
    parser.add_argument('-iT', required=False, help='Instrument name')
    parser.add_argument('-sP',
                        required=False,
                        help='Strike price of instrument')
    parser.add_argument('-oT', required=False, help='Options Type')
    args = parser.parse_args()

    attribute.initializeInstDetails(args.iT, args.sP, args.oT)
    print "Using the experiment folder " + args.e
    print args.e + "/design.ini"
    config = ConfigObj(args.e + "/design.ini")

    print "The config parameters that I am working with are"
    print config
    dirName = os.path.dirname(args.e) + "/"
    rProgName = "corr-td." + os.path.basename(os.path.abspath(
        args.td)) + "-dt." + args.dt + attribute.generateExtension() + ".r"
    rProgLocation = dirName + '/' + rProgName
    rScript = open(rProgLocation, 'w')
    rScript.write('#!/usr/bin/Rscript \n')
    rCodeGen.ForSetUpChecks(rScript)
    lCorrelationFileName = dirName + '/correlation-coef' + '-td.' + os.path.basename(
        os.path.abspath(args.td)
    ) + '-dt.' + args.dt + attribute.generateExtension() + ".coef"
    rCodeGen.ToReadTargetFile(rScript, config)
    for target in config['target']:
        rCodeGen.ToFindCorrelationAndPrintingToFile(rScript, config, target,
                                                    lCorrelationFileName)
    rScript.close()
    print "Finished generating R training program: " + rProgLocation
    os.system("chmod +x " + rProgLocation)
Ejemplo n.º 5
0
def getTargetValuesIntoDict(pTargetValuesDict):
    # The following will take care if args.e = "ob/e1/" or args.e = "ob/e1"
    dirName = args.d.replace('/ro/', '/wf/')
    config = ConfigObj(args.e + "/design.ini")
    target = config["target"]
    lTargetBuyValuesDict = dict()
    targetBuyValuesFileName = dirName + "/t/" + target[
        'buy'] + attribute.generateExtension() + ".target"
    print("Buy Target values file : " + targetBuyValuesFileName)
    sys.stdout.flush()
    targetBuyValuesFile = open(targetBuyValuesFileName)
    fileHasHeader = True
    numberOfLinesInBuyTargetValuesFile = functionToReadTargetFileToDictionary(
        targetBuyValuesFile, lTargetBuyValuesDict, fileHasHeader)
    print("Finished reading the buy target values file")
    print("The number of elements in the buy target values dictionary is : " +
          str(len(lTargetBuyValuesDict)))
    #     if (numberOfLinesInBuyTargetValuesFile != len(lTargetBuyValuesDict)):
    #         print("Number of duplicate time stamps rejected in buy target values dictionary = " + str(numberOfLinesInBuyTargetValuesFile - len(lTargetBuyValuesDict)))
    #         os._exit(-1)
    sys.stdout.flush()

    lTargetSellValuesDict = dict()
    targetSellValuesFileName = dirName + "/t/" + target[
        'sell'] + attribute.generateExtension() + ".target"
    print("Sell Target values file : " + targetSellValuesFileName)
    sys.stdout.flush()
    targetSellValuesFile = open(targetSellValuesFileName)
    fileHasHeader = True
    numberOfLinesInSellTargetValuesFile = functionToReadTargetFileToDictionary(
        targetSellValuesFile, lTargetSellValuesDict, fileHasHeader)
    print("Finished reading the sell target values file")
    print("The number of elements in the sell target values dictionary is : " +
          str(len(lTargetSellValuesDict)))
    #     if (numberOfLinesInSellTargetValuesFile != len(lTargetSellValuesDict)):
    #         print("Number of duplicate timestamps rejected in sell target values dictionary = " + str(numberOfLinesInSellTargetValuesFile - len(lTargetSellValuesDict)))
    #         os._exit(-1)
    sys.stdout.flush()
    #-----------------Getting target values into dictionary -------------------------------------
    for elements in lTargetBuyValuesDict.keys():
        pTargetValuesDict[elements] = {}
        pTargetValuesDict[elements]['buy'] = lTargetBuyValuesDict[elements]
        pTargetValuesDict[elements]['sell'] = lTargetSellValuesDict[elements]
Ejemplo n.º 6
0
def getPredictCommandList(experimentFolder,algoName,predictFolder,trainFolder,pNumberOfDays,pWtsTaken):
    commandList = list()
    # lets make a list of all the scripts that need to be run
    predictScriptNames = glob.glob(experimentFolder+"/predict" + algoName + "-td." + os.path.basename(os.path.abspath(trainFolder))\
                                    + "-dt." + pNumberOfDays + "-pd." + os.path.basename(os.path.abspath(predictFolder)) +"-wt."\
                                     + pWtsTaken + attribute.generateExtension() + "-For*.r")
    dirName = predictFolder.replace('/ro/','/wf/')      
    for predictScriptName in predictScriptNames:
        commandList.append([predictScriptName,"-d",dirName])
    return commandList
Ejemplo n.º 7
0
def getPredictCommandList(experimentFolder, algoName, predictFolder,
                          trainFolder, pNumberOfDays, pWtsTaken):
    commandList = list()
    # lets make a list of all the scripts that need to be run
    predictScriptNames = glob.glob(experimentFolder+"/predict" + algoName + "-td." + os.path.basename(os.path.abspath(trainFolder))\
                                    + "-dt." + pNumberOfDays + "-pd." + os.path.basename(os.path.abspath(predictFolder)) +"-wt."\
                                     + pWtsTaken + attribute.generateExtension() + "-For*.r")
    dirName = predictFolder.replace('/ro/', '/wf/')
    for predictScriptName in predictScriptNames:
        commandList.append([predictScriptName, "-d", dirName])
    return commandList
Ejemplo n.º 8
0
def getTrainCommandList(experimentFolder,algoName,trainFolder,pNumberOfDays,pWtsTaken):
    commandList = list()
    # lets make a list of all the scripts that need to be run
    trainScriptNames = glob.glob(experimentFolder+"/train" + algoName + "-td." + os.path.basename(os.path.abspath(trainFolder)) + \
                                 "-dt." + pNumberOfDays +"-wt." + pWtsTaken  + attribute.generateExtension() +"-For*.r")
    dirName = trainFolder.replace('/ro/','/wf/')
    trainingDataList = attribute.getListOfTrainingDirectoriesNames(args.dt,dirName,args.iT)
    trainingDataListString = ";".join(trainingDataList)
    #   if len(trainingDataList)>1:
    #      trainingDataListString = "\"" + trainingDataListString + "\"" 
    for trainScriptName in trainScriptNames:
        commandList.append([trainScriptName,"-d",trainingDataListString])
    return commandList
Ejemplo n.º 9
0
Archivo: rCodeGen.py Proyecto: grmte/ml
def saveTrainingModel(rScript,args,path,pTargetVariableKey,pDouble="", treeOrNot = "", treeFileName = ""):
    algo = getAlgoName(args)    
    if len(pDouble)==0:
        outputFileName = path+'/'+algo+pTargetVariableKey+ '-td.' + os.path.basename(os.path.abspath(args.td)) + '-dt.' + args.dt + '-targetClass.' + \
                         args.targetClass + "-wt." + args.wt+ attribute.generateExtension()  +'.model'
        modelValueFileName = path+'/'+algo+ '-td.' + os.path.basename(os.path.abspath(args.td)) + '-dt.' + args.dt + '-targetClass.' + \
                         args.targetClass + "-wt." + args.wt+ attribute.generateExtension()  +'.coef'
    else:
        outputFileName = path+'/'+algo+pTargetVariableKey+ '-td.' + os.path.basename(os.path.abspath(args.td)) + '-dt.' + args.dt + '-targetClass.' + \
                         args.targetClass + "-wt." + args.wt+ attribute.generateExtension()  +'double.model'
        modelValueFileName = path+'/'+algo+ '-td.' + os.path.basename(os.path.abspath(args.td)) + '-dt.' + args.dt + '-targetClass.' + \
                         args.targetClass + "-wt." + args.wt+ attribute.generateExtension()  +'double.coef'        
    rScript.write('\nprint (paste("Section8: Saving the model in file '+ outputFileName +'")) \n')
    rScript.write('save(fit, file = "'+ outputFileName+'")\n')
    rScript.write('l = coef(fit, s = "lambda.min")\n')
    rScript.write('string_intercept = paste("' + pTargetVariableKey + '" , "-intercept-value = ",toString(l[1]),"\\n",sep="")\n')
    rScript.write('string_intercept = paste(string_intercept,"vector-of-alphas-'+ pTargetVariableKey + ' = ",sep="")\n')
    rScript.write('for(i in 2:length(l)){\n')
    rScript.write('    string_intercept = paste(string_intercept,l[i],",",sep="")\n')
    rScript.write('}\n')         
    rScript.write('string_intercept = paste(string_intercept,"\\n",sep="")\n')
    rScript.write('cat(string_intercept,file="'+ modelValueFileName + '",sep="",append=TRUE)\n')
Ejemplo n.º 10
0
def ToFindCorrelationAndPrintingToFile(rScript, config, pTargetVariableKey,
                                       pFileName):
    features = config["features-" + pTargetVariableKey]
    rScript.write('\nprint ("Section6: To Find Correlation For ' +
                  pTargetVariableKey + '") \n')
    rScript.write('string_intercept = paste("CorrelationCoeficient Of ","' +
                  pTargetVariableKey + '" , ":- ","\\n",sep="")\n')
    rScript.write('cat(string_intercept,file="' + pFileName +
                  '",sep="",append=TRUE)\n')
    for feature in features:
        userFriendlyName = features[feature]
        userFriendlyName = userFriendlyName.replace('[', '')
        userFriendlyName = userFriendlyName.replace(']', '')
        userFriendlyName = userFriendlyName.replace('(', '')
        userFriendlyName = userFriendlyName.replace(')', '')
        featureNameWithoutBrackets = features[feature].replace(
            '(', '').replace(')', '') + attribute.generateExtension()
        rScript.write('lFlag=FALSE\n')
        rScript.write('for (file in lDirectorySet[[1]]){\n')
        rScript.write('    if (!lFlag){\n')
        rScript.write('        load(paste(file,"/f/' +
                      featureNameWithoutBrackets + '.bin",sep=""))\n')
        rScript.write('        ' + feature + pTargetVariableKey + '<-get("' +
                      userFriendlyName + '")' + skipRowCode + ' \n')
        rScript.write('        rm("' + userFriendlyName + '")\n')
        rScript.write('        lFlag=TRUE\n')
        rScript.write('    }\n')
        rScript.write('    else {\n')
        rScript.write('        load(paste(file,"/f/' +
                      featureNameWithoutBrackets + '.bin",sep=""))\n')
        rScript.write('        temp<-get("' + userFriendlyName + '")' +
                      skipRowCode + '\n')
        rScript.write('        rm("' + userFriendlyName + '")\n')
        rScript.write('        ' + feature + pTargetVariableKey + '<-rbind(' +
                      feature + pTargetVariableKey + ',temp)\n')
        rScript.write('        rm(temp)\n')
        rScript.write('    }\n')
        rScript.write('    print ("Reading ' + featureNameWithoutBrackets +
                      '.feature' + '") \n')
        rScript.write('}\n')
        userFriendlyName = features[feature]
        rScript.write('tempCor <- cor(' + pTargetVariableKey + '[,2] , ' +
                      feature + pTargetVariableKey + '[,2] )\n')
        rScript.write('string_intercept = paste("' + userFriendlyName +
                      '" ,"=",toString(tempCor),"\\n",sep="")\n')
        rScript.write('cat(string_intercept,file="' + pFileName +
                      '",sep="",append=TRUE)\n')
        rScript.write('rm(' + feature + pTargetVariableKey + ')\n')
    rScript.write('string_intercept = paste("\\n","\\n",sep="")\n')
    rScript.write('cat(string_intercept,file="' + pFileName +
                  '",sep="",append=TRUE)\n')
Ejemplo n.º 11
0
def getTargetValuesIntoDict(pTargetValuesDict):
    # The following will take care if args.e = "ob/e1/" or args.e = "ob/e1"
    dirName = args.d.replace('/ro/','/wf/')
    config = ConfigObj(args.e+"/design.ini")
    target = config["target"]
    lTargetBuyValuesDict = dict()
    targetBuyValuesFileName = dirName+"/t/" + target['buy']+ attribute.generateExtension() + ".target"
    print("Buy Target values file : "+ targetBuyValuesFileName)
    sys.stdout.flush()
    targetBuyValuesFile = open(targetBuyValuesFileName)
    fileHasHeader = True
    numberOfLinesInBuyTargetValuesFile = functionToReadTargetFileToDictionary(targetBuyValuesFile,lTargetBuyValuesDict,fileHasHeader)
    print("Finished reading the buy target values file")    
    print("The number of elements in the buy target values dictionary is : " + str(len(lTargetBuyValuesDict)))
#     if (numberOfLinesInBuyTargetValuesFile != len(lTargetBuyValuesDict)):
#         print("Number of duplicate time stamps rejected in buy target values dictionary = " + str(numberOfLinesInBuyTargetValuesFile - len(lTargetBuyValuesDict)))
#         os._exit(-1)
    sys.stdout.flush()

    lTargetSellValuesDict = dict()
    targetSellValuesFileName = dirName+"/t/" + target['sell']+ attribute.generateExtension() + ".target"
    print("Sell Target values file : "+ targetSellValuesFileName)
    sys.stdout.flush()
    targetSellValuesFile = open(targetSellValuesFileName)
    fileHasHeader = True
    numberOfLinesInSellTargetValuesFile = functionToReadTargetFileToDictionary(targetSellValuesFile,lTargetSellValuesDict,fileHasHeader)
    print("Finished reading the sell target values file")    
    print("The number of elements in the sell target values dictionary is : " + str(len(lTargetSellValuesDict)))
#     if (numberOfLinesInSellTargetValuesFile != len(lTargetSellValuesDict)):
#         print("Number of duplicate timestamps rejected in sell target values dictionary = " + str(numberOfLinesInSellTargetValuesFile - len(lTargetSellValuesDict)))
#         os._exit(-1)
    sys.stdout.flush()
#-----------------Getting target values into dictionary -------------------------------------
    for elements in lTargetBuyValuesDict.keys():
        pTargetValuesDict[elements] = {}
        pTargetValuesDict[elements]['buy'] = lTargetBuyValuesDict[elements]
        pTargetValuesDict[elements]['sell'] = lTargetSellValuesDict[elements] 
Ejemplo n.º 12
0
def getTrainCommandList(experimentFolder, algoName, trainFolder, pNumberOfDays,
                        pWtsTaken):
    commandList = list()
    # lets make a list of all the scripts that need to be run
    trainScriptNames = glob.glob(experimentFolder+"/train" + algoName + "-td." + os.path.basename(os.path.abspath(trainFolder)) + \
                                 "-dt." + pNumberOfDays +"-wt." + pWtsTaken  + attribute.generateExtension() +"-For*.r")
    dirName = trainFolder.replace('/ro/', '/wf/')
    trainingDataList = attribute.getListOfTrainingDirectoriesNames(
        args.dt, dirName, args.iT)
    trainingDataListString = ";".join(trainingDataList)
    #   if len(trainingDataList)>1:
    #      trainingDataListString = "\"" + trainingDataListString + "\""
    for trainScriptName in trainScriptNames:
        commandList.append([trainScriptName, "-d", trainingDataListString])
    return commandList
Ejemplo n.º 13
0
Archivo: rCodeGen.py Proyecto: grmte/ml
def ForLoadingModel(rScript,args,path,pTargetVariableKey,config):
    features = config["features-"+pTargetVariableKey]
    if(args.a == 'glmnet'):
        rScript.write('print ("Section7: Running glmnet") \n')
        rScript.write('X <- cbind(')
        currentFeatureNumber=0
        for feature in features:
            rScript.write(features.keys()[currentFeatureNumber]+pTargetVariableKey+'[,2]')
            currentFeatureNumber = currentFeatureNumber+1
            if(len(features) > currentFeatureNumber):
                rScript.write(',')
        rScript.write(')\n')

    predictionModel = args.a + pTargetVariableKey + '-td.' + os.path.basename(os.path.abspath(args.td)) + '-dt.' + args.dt + '-targetClass.' + args.targetClass +\
                        "-wt." + args.wt+ attribute.generateExtension()  + '.model'   

    rScript.write('load("'+os.path.dirname(path)+'/'+predictionModel+'")\n')  
Ejemplo n.º 14
0
def ForLoadingModel(rScript, args, path, pTargetVariableKey, config):
    features = config["features-" + pTargetVariableKey]
    if (args.a == 'glmnet'):
        rScript.write('print ("Section7: Running glmnet") \n')
        rScript.write('X <- cbind(')
        currentFeatureNumber = 0
        for feature in features:
            rScript.write(features.keys()[currentFeatureNumber] +
                          pTargetVariableKey + '[,2]')
            currentFeatureNumber = currentFeatureNumber + 1
            if (len(features) > currentFeatureNumber):
                rScript.write(',')
        rScript.write(')\n')

    predictionModel = args.a + pTargetVariableKey + '-td.' + os.path.basename(os.path.abspath(args.td)) + '-dt.' + args.dt + '-targetClass.' + args.targetClass +\
                        "-wt." + args.wt+ attribute.generateExtension()  + '.model'

    rScript.write('load("' + os.path.dirname(path) + '/' + predictionModel +
                  '")\n')
Ejemplo n.º 15
0
def ToReadFeatureFiles(rScript,
                       config,
                       targetVariable,
                       pUseWhichArgumentForData=2):
    features = config["features-" + targetVariable]
    rScript.write('\nprint ("Section3: Read feature files") \n')
    if pUseWhichArgumentForData == 4:
        rScript.write(
            'lDirectorySet<-strsplit(args[4],";",fixed=TRUE,useBytes=FALSE)\n')
    else:
        rScript.write(
            'lDirectorySet<-strsplit(args[2],";",fixed=TRUE,useBytes=FALSE)\n')
    for feature in features:
        userFriendlyName = features[feature]
        userFriendlyName = userFriendlyName.replace('[', '')
        userFriendlyName = userFriendlyName.replace(']', '')
        userFriendlyName = userFriendlyName.replace('(', '')
        userFriendlyName = userFriendlyName.replace(')', '')
        featureNameWithoutBrackets = features[feature].replace(
            '(', '').replace(')', '') + attribute.generateExtension()
        rScript.write('lFlag=FALSE\n')
        rScript.write('for (file in lDirectorySet[[1]]){\n')
        rScript.write('    if (!lFlag){\n')
        rScript.write('        load(paste(file,"/f/' +
                      featureNameWithoutBrackets + '.bin",sep=""))\n')
        rScript.write('        ' + feature + targetVariable + '<-get("' +
                      userFriendlyName + '")' + skipRowCode + ' \n')
        rScript.write('        rm("' + userFriendlyName + '")\n')
        rScript.write('        lFlag=TRUE\n')
        rScript.write('    }\n')
        rScript.write('    else {\n')
        rScript.write('        load(paste(file,"/f/' +
                      featureNameWithoutBrackets + '.bin",sep=""))\n')
        rScript.write('        temp<-get("' + userFriendlyName + '")' +
                      skipRowCode + '\n')
        rScript.write('        rm("' + userFriendlyName + '")\n')
        rScript.write('        ' + feature + targetVariable + '<-rbind(' +
                      feature + targetVariable + ',temp)\n')
        rScript.write('        rm(temp)\n')
        rScript.write('    }\n')
        rScript.write('    print ("Reading ' + featureNameWithoutBrackets +
                      '.feature' + '") \n')
        rScript.write('}\n')
Ejemplo n.º 16
0
def ToReadTargetFile(rScript, config):
    rScript.write('print ("Section2: Read target files") \n')
    lTargetSet = config["target"]
    rScript.write(
        'lDirectorySet<-strsplit(args[2],";",fixed=TRUE,useBytes=FALSE)\n')
    for target in lTargetSet:
        userFriendlyName = lTargetSet[target]
        userFriendlyName = userFriendlyName.replace('[', '')
        userFriendlyName = userFriendlyName.replace(']', '')
        fileToRead = lTargetSet[target] + attribute.generateExtension()
        rScript.write('lengthOfEachDay = numeric()\n')
        rScript.write('lFlag=FALSE\n')
        rScript.write('for (file in lDirectorySet[[1]]){\n')
        rScript.write('    if (!lFlag){\n')
        rScript.write('        load(paste(file,"/t/' + fileToRead +
                      '.bin",sep=""))\n')
        rScript.write('        ' + target + '<- ' + userFriendlyName +
                      skipRowCode + '\n')
        rScript.write('        rm(' + userFriendlyName + ')\n')
        rScript.write('        lengthOfEachDay = c(lengthOfEachDay,nrow(' +
                      target + '))\n')
        rScript.write('        lFlag=TRUE\n')
        rScript.write('    }\n')
        rScript.write('    else{\n')
        rScript.write('        load(paste(file,"/t/' + fileToRead +
                      '.bin",sep=""))\n')
        rScript.write('        temp<-' + userFriendlyName + skipRowCode + '\n')
        rScript.write('        rm(' + userFriendlyName + ')\n')
        rScript.write(
            '        lengthOfEachDay = c(lengthOfEachDay,nrow(temp))\n')
        rScript.write('        ' + target + '<-rbind(' + target + ',temp)\n')
        rScript.write('        rm(temp)\n')
        rScript.write('    }\n')
        rScript.write('    print ("Reading ' + fileToRead + '.target' +
                      '") \n')
        rScript.write('}\n')
Ejemplo n.º 17
0
def main():
    parser = argparse.ArgumentParser(
        description=
        'Generates predict.r which will use design.model to make predictions. Sample command is pGenForE.py -e ob/e1/'
    )
    parser.add_argument('-e',
                        required=True,
                        help='Directory to find the experiement designs')
    parser.add_argument('-a', required=True, help='Algorithm name')
    parser.add_argument(
        '-s',
        required=True,
        help='Location of the subfolder that contains the sub experiments')
    parser.add_argument('-pd', required=True, help='Prediction directory')
    parser.add_argument('-td',
                        required=True,
                        help="Day on which it was trained")
    parser.add_argument('-dt',
                        required=True,
                        help="Number of days it was trained")
    parser.add_argument(
        '-targetClass',
        required=True,
        help=
        "For which model was used ; binomial(target takes only true and false) / multinomial (target values takes more than 2 values)"
    )
    parser.add_argument(
        '-skipP',
        required=False,
        help=
        "yes or no , If you want to regenerate already generated algorithm prediction file then make this value No"
    )
    parser.add_argument(
        '-wt',
        required=False,
        help="default/exp , weight type to be given to different days")
    parser.add_argument('-iT', required=False, help='Instrument name')
    parser.add_argument('-sP',
                        required=False,
                        help='Strike price of instrument')
    parser.add_argument('-oT', required=False, help='Options Type')
    args = parser.parse_args()

    attribute.initializeInstDetails(args.iT, args.sP, args.oT)
    if args.skipP == None:
        args.skipP = "yes"

    print "\nRunning pGen.py to generate the predict script"
    print "Using the experiment folder " + args.e

    config = ConfigObj(args.e + "/design.ini")

    print "The config parameters that I am working with are"
    print config

    dirName = os.path.dirname(args.e)

    if args.a is None:
        algo = 'glmnet'
    else:
        algo = args.a

    args.s = args.s + "/"

    predictDataDirectoryName = args.pd.replace('/ro/', '/wf/')
    predictDataDirectoryName = predictDataDirectoryName + "/p/" + os.path.basename(
        os.path.dirname(args.e))
    if not os.path.exists(predictDataDirectoryName):
        os.mkdir(predictDataDirectoryName)

    rProgName = "predict" + algo + "-td." + os.path.basename(os.path.abspath(args.td)) + "-dt." + args.dt + "-pd." + os.path.basename(os.path.abspath(args.pd)) \
                + "-wt." + args.wt + attribute.generateExtension() +"-For"+os.path.basename(os.path.dirname(args.s))+"SubE.r"
    rProgLocation = dirName + '/' + rProgName
    rScript = open(rProgLocation, 'w')

    rScript.write('#!/usr/bin/Rscript \n')
    if (args.a == 'glmnet'):
        rScript.write('require (glmnet) \n')
    elif (args.a == 'randomForest'):
        rScript.write('require (randomForest) \n')

    rCodeGen.ForSetUpChecks(rScript)
    for target in config['target']:
        rCodeGen.ToReadFeatureFiles(rScript, config, target)
        rCodeGen.ForSanityChecks(rScript, config, target)

    designFiles = utility.list_files(args.s)

    for designFile in designFiles:
        print "Generating r code for " + designFile
        rScript.write('\n\nprint ("Running r code for' + designFile + '")\n')
        config = ConfigObj(designFile)
        for target in config['target']:
            predictionFileName = predictDataDirectoryName + "/" +  args.a + target + '-td.' + os.path.basename(os.path.abspath(args.td)) + '-dt.' + args.dt +\
             '-targetClass.' + args.targetClass + '-f.' + os.path.basename(os.path.dirname(designFile)) + "-wt." + args.wt+ attribute.generateExtension()  +".predictions"
            if not os.path.isfile(predictionFileName) or (args.skipP.lower()
                                                          == "no"):
                rCodeGen.ForPredictions(rScript, config, args, designFile,
                                        target)
            else:
                print predictionFileName + "Already exists , not generating it again . If you want to generate it again then rerun it with -skipP no "

    rScript.write('rm(list=ls())')
    rScript.close()
    print "Finished generating R prediction program: " + rProgLocation
    os.system("chmod +x " + rProgLocation)
Ejemplo n.º 18
0
Archivo: tradeE10.py Proyecto: grmte/ml
def main():
    dataFile.getDataIntoMatrix(args.pd)
    predictedValuesDict = dict()
    getPredictedValuesIntoDict(predictedValuesDict)
    lower_entry = totalEntryCL[0]
    upper_entry = totalEntryCL[1]
    lower_exit = totalExitCL[0]
    l_index_gap = 0.2
    l_max_gap = (upper_entry - lower_entry)/(4)
    while l_index_gap < l_max_gap:
        exitCL2 = lower_exit + float(l_index_gap) 
        exitCL1 = lower_exit + 2*float(l_index_gap) 
        exitCL0 = lower_exit + 3*float(l_index_gap) 
        exitCL = lower_exit + 4*float(l_index_gap)
        if(lower_entry < exitCL):
            lower_entry = lower_exit + 4*float(l_index_gap)
        entryCL2 = lower_entry + float(l_index_gap)
        entryCL1 = lower_entry + 2*float(l_index_gap) 
        entryCL0 = lower_entry + 3*float(l_index_gap) 
        entryCL = lower_entry + 4*float(l_index_gap)
        l_entry_list = [entryCL2,entryCL1,entryCL0 , entryCL]
        l_exit_list = [exitCL2,exitCL1,exitCL0 , exitCL]
        print("Entry Exit list" + str(l_entry_list)+ str(l_exit_list))
        if(exitCL < entryCL2 and entryCL <= upper_entry):
            lInitialFileName = args.a + '-td.' + os.path.basename(os.path.abspath(args.td)) + \
                           '-dt.' + args.dt + '-targetClass.' + args.targetClass + '-f.' + experimentName + "-wt." + args.wt+ attribute.generateExtension() + \
                           '-l.'+entryCL+"-"+entryCL0+"-"+entryCL1+"-"+entryCL2+"-"+exitCL +"-"+exitCL0 +"-"+exitCL1 +"-"+exitCL2 + "-tq." + args.orderQty  + "-te.10" 
            initialFileName.append(lInitialFileName)
            for lFileName in initialFileName:
                readOnceAndWrite(lFileName, l_entry_list, l_exit_list, predictedValuesDict)
        else:
            l_index_gap = l_index_gap + 0.1
Ejemplo n.º 19
0
Archivo: tradeE13.py Proyecto: grmte/ml
gEntryCLList = args.entryCL.split(";")
gExitCLList = args.exitCL.split(";")

gStandingAtAskPMinusOneTickInCloseSell = 0
gStandingAtBidPPlusOneTickInCloseBuy = 0

gPipsTaken = int(args.pipTaken)


gOpenBuyFillPrice = 0
gOpenSellFillPrice = 0

initialFileName = []
for indexOfCL in range(0,len(gEntryCLList)):
    lInitialFileName = args.a + '-td.' + os.path.basename(os.path.abspath(args.td)) + \
                   '-dt.' + args.dt + '-targetClass.' + args.targetClass + '-f.' + experimentName + "-wt." + args.wt+ "-pipTaken." + args.pipTaken+ attribute.generateExtension() + \
                   '-l.'+gEntryCLList[indexOfCL]+"-"+gExitCLList[indexOfCL]  + "-tq." + args.orderQty + "-te.13"

    initialFileName.append(lInitialFileName)
    
g_quantity_adjustment_list_for_sell = {}
g_quantity_adjustment_list_for_buy = {}
g_bestqty_list_for_sell = {}
g_bestqty_list_for_buy = {}
class ticks_values_to_be_stored(object):
    def __init__(self):
        self.MsgCode = ''
        self.OrderType = ''
        self.NewP = 0.0
        self.NewQ = 0
        self.OldP = 0.0
Ejemplo n.º 20
0
def main():
    parser = argparse.ArgumentParser(description='Generates train.r. A sample command is mGenForE.py -e ob/e1/ ')
    parser.add_argument('-e', required=True,help='Experiement folder to use to find the features and targets')
    parser.add_argument('-a', required=True,help='Algorithm name')
    parser.add_argument('-targetClass',required=True,help="binomial(target takes only true and false) / multinomial (target values takes more than 2 values)")
    parser.add_argument('-skipT',required=False,help="yes or no , If you want to regenerate already generated algorithm model file then make this value No")
    parser.add_argument('-td',required=True,help="Day on which it was trained")
    parser.add_argument('-dt',required=True,help="Number of days it was trained")
    parser.add_argument('-wt',required=True,help="default/exp , weight type to be given to different days")
    parser.add_argument('-iT',required=False,help='Instrument name')
    parser.add_argument('-sP',required=False,help='Strike price of instrument')
    parser.add_argument('-oT',required=False,help='Options Type')
    parser.add_argument('-treeType',required=False,help="Tree read for trade engine")
    parser.add_argument('-tTD',required=False,help="Tree number of days to be used")
    args = parser.parse_args()

    attribute.initializeInstDetails(args.iT,args.sP,args.oT)
    if args.skipT == None:
        args.skipT = "yes"
    if args.tTD == None:
        args.tTD = args.dt
    print "Using the experiment folder " + args.e

    config = ConfigObj(args.e+"/design1.ini")
    configInit = ConfigObj(args.e+"design.ini")
    
#     configInitList = []
#     for iniFile in os.listdir(args.e + "/"):
#         if '.ini' in iniFile and iniFile != 'design.ini':
#             index = iniFile[ file.index(".") - 1 ]
#             configInitList.append( ( index, ConfigObj(args.e+"/"+iniFile) ) )
#     configInit = dict(configInitList)
        
    print "The config parameters that I am working with are"
    print config

    dirName=os.path.dirname(args.e)+"/"

    algo = rCodeGen.getAlgoName(args)
    
    rProgName = "traintree" +  "-td." + os.path.basename(os.path.abspath(args.td)) + "-tTD" + args.tTD + "-dt." + args.dt + "-wt." + args.wt + attribute.generateExtension() +".r"
    rProgLocation = dirName+'/'+rProgName
    rScript = open(rProgLocation,'w')
    rScript.write('#!/usr/bin/Rscript \n')
    rScript.write('require (rpart) \n')
        
    rCodeGen.ForSetUpChecks(rScript)
    lAllFilePresent = True
    
    for target in config['target']:
        lTreeFileName = dirName+"/"+algo+ target+'-td.' + os.path.basename(os.path.abspath(args.td)) + "-tTD" + args.tTD + '-dt.' + args.dt + attribute.generateExtension() +".tree" + args.treeType
        if os.path.isfile(lTreeFileName) and ( args.skipT.lower() == "yes" ):
            continue
        else:
            lAllFilePresent = False
            break
    if lAllFilePresent == False:
        rCodeGen.ToReadTargetFile(rScript,config)
        rCodeGen.ForWtVectorGeneration(rScript,args.wt.lower())

        for target in config['target']:
            rCodeGen.ToReadFeatureFiles(rScript,config,target)
            rCodeGen.ToReadPredictionFiles(rScript,config,target,configInit)
            rCodeGen.ForSanityChecks(rScript,config,target)
            lTreeFileName = dirName+"/"+algo+ target+'-td.' + os.path.basename(os.path.abspath(args.td)) + "-tTD" + args.tTD +'-dt.' + args.dt + attribute.generateExtension() +".tree" + args.treeType
            if os.path.isfile(lTreeFileName) and ( args.skipT.lower() == "yes" ):
                print "Model File " + lTreeFileName + " already exists . So it will not be formed again . If you want to re-generate model then re-run with -skipT=No"
            else:
                rCodeGen.ToRenameDataBeforeTraining(rScript,config,target)
                rCodeGen.ForTrainingTree(rScript,args,config,target, args.treeType)
                print lTreeFileName
                rCodeGen.saveTrainingTree(rScript,args,dirName,target, lTreeFileName)


    rScript.close()
    print "Finished generating R training program: " + rProgLocation
    os.system("chmod +x "+rProgLocation)
Ejemplo n.º 21
0
def getPredictedValuesIntoDict(pPredictedValuesDict):
    # The following will take care if args.e = "ob/e1/" or args.e = "ob/e1"
    dirName = args.pd.replace('/ro/', '/wf/')
    config = ConfigObj(args.e + "/design.ini")
    target = config["target"]
    predictedValuesFileName = dirName+"/p/"+mainExperimentName+"/"+args.a + target.keys()[0] + '-td.' + os.path.basename(os.path.abspath(args.td)) + \
                                 '-dt.' + args.dt + '-targetClass.' + args.targetClass + '-f.' + experimentName +  "-wt." + args.wt+ attribute.generateExtension() + ".predictions"
    print("Predicted values file : " + predictedValuesFileName)
    sys.stdout.flush()
    predictedValuesFile = open(predictedValuesFileName)
    fileHasHeader = True
    numberOfLinesInPredictedValuesFile = 0
    for line in predictedValuesFile:
        if fileHasHeader == True:
            fileHasHeader = False
            continue
        line = line.rstrip('\n')
        splitLine = line.split(',', 2)
        timeStamp = float(splitLine[1])
        try:  #TODO: remove this and then run the code to identify errors.
            predictedProb = float(splitLine[2])
        except:
            predictedProb = 0
        pPredictedValuesDict[timeStamp] = predictedProb
        numberOfLinesInPredictedValuesFile += 1
    print("Finished reading the predicted values file")
    print("The number of elements in the predicted values dictionary is : " +
          str(len(pPredictedValuesDict)))
    if (numberOfLinesInPredictedValuesFile != len(pPredictedValuesDict)):
        print("Number of duplicate timestamps rejected = " +
              str(numberOfLinesInPredictedValuesFile -
                  len(pPredictedValuesDict)))
        os._exit(-1)
    sys.stdout.flush()
Ejemplo n.º 22
0
def ForPredictions(rScript,
                   config,
                   args,
                   pathToDesignFile,
                   pTargetVariableKey,
                   pUseWhichArgumentForData=2,
                   pDouble=""):
    features = config["features-" + pTargetVariableKey]
    #Renaming all features if model and predictions are done simultaneously , so that training and prediction data set do not conflict
    algo = getAlgoName(args)
    if len(pDouble) == 0:
        predictionModel = algo + pTargetVariableKey + '-td.' + os.path.basename(os.path.abspath(args.td)) + '-dt.' + args.dt + '-targetClass.' + args.targetClass +\
                            "-wt." + args.wt+ attribute.generateExtension()  + '.model'
    else:
        predictionModel = algo + pTargetVariableKey + '-td.' + os.path.basename(os.path.abspath(args.td)) + '-dt.' + args.dt + '-targetClass.' + args.targetClass +\
                            "-wt." + args.wt+ attribute.generateExtension()  + 'double.model'
    rScript.write('\nprint ("Section6: Read in prediction model' +
                  os.path.dirname(pathToDesignFile) + '/' + predictionModel +
                  '") \n')
    rScript.write('load("' + os.path.dirname(pathToDesignFile) + '/' +
                  predictionModel + '")')

    if (args.a == 'glmnet'):
        rScript.write('\n\nprint ("Section7: Creating data frame") \n')
        rScript.write('df = cbind(')
        currentFeatureNumber = 0
        for feature in features:
            rScript.write(feature + pTargetVariableKey + '[,2]')
            currentFeatureNumber = currentFeatureNumber + 1
            if (len(features) > currentFeatureNumber):
                rScript.write(',')
        rScript.write(")\n\n")

        rScript.write('print ("Section8: Running ' + args.a +
                      ' prediction") \n')
        rScript.write(
            'Prob <- predict (fit, newx = df,s = "lambda.min",type = "response")'
        )
        rScript.write("\n\n")
    elif (args.a == 'logitr'):
        rScript.write('\n\nprint ("Section7: Creating the data frame") \n')
        rScript.write('df = data.frame(')
        currentFeatureNumber = 0
        for feature in features:
            userFriendlyName = features[feature]
            userFriendlyName = userFriendlyName.replace('[', '')
            userFriendlyName = userFriendlyName.replace(']', '')
            userFriendlyName = userFriendlyName.replace('(', '')
            userFriendlyName = userFriendlyName.replace(')', '')
            rScript.write(userFriendlyName + '=' + feature +
                          pTargetVariableKey + '[,2]')
            currentFeatureNumber = currentFeatureNumber + 1
            if (len(features) > currentFeatureNumber):
                rScript.write(',')
        rScript.write(")\n\n")

        rScript.write('print ("Section8: Running ' + args.a +
                      ' prediction") \n')
        rScript.write('Prob<- predict (fit, newdata = df, type = "response")')
        rScript.write("\n\n")
    elif (args.a == 'randomForest'):
        rScript.write('\n\nprint ("Section7: Creating the data frame") \n')
        rScript.write('df = data.frame(')
        currentFeatureNumber = 0
        for feature in features:
            userFriendlyName = features[feature]
            userFriendlyName = userFriendlyName.replace('[', '')
            userFriendlyName = userFriendlyName.replace(']', '')
            userFriendlyName = userFriendlyName.replace('(', '')
            userFriendlyName = userFriendlyName.replace(')', '')
            rScript.write(userFriendlyName + '=' + feature +
                          pTargetVariableKey + '[,2]')
            currentFeatureNumber = currentFeatureNumber + 1
            if (len(features) > currentFeatureNumber):
                rScript.write(',')
        rScript.write(")\n\n")

        rScript.write('print ("Section8: Running ' + args.a +
                      ' prediction") \n')
        rScript.write('Prob<- predict (fit, df)')
        rScript.write("\n\n")
    elif (args.a == 'mda'):
        rScript.write('\n\nprint ("Section7: Creating the data frame") \n')
        rScript.write('df = data.frame(')
        currentFeatureNumber = 0
        for feature in features:
            userFriendlyName = features[feature]
            userFriendlyName = userFriendlyName.replace('[', '')
            userFriendlyName = userFriendlyName.replace(']', '')
            userFriendlyName = userFriendlyName.replace('(', '')
            userFriendlyName = userFriendlyName.replace(')', '')
            rScript.write(userFriendlyName + '=' + feature +
                          pTargetVariableKey + '[,2]')
            currentFeatureNumber = currentFeatureNumber + 1
            if (len(features) > currentFeatureNumber):
                rScript.write(',')
        rScript.write(")\n\n")

        rScript.write('print ("Section8: Running ' + algo + ' prediction") \n')
        rScript.write('Prob<- predict (fit, df)')
        rScript.write("\n\n")
    else:
        print "The only valid options are glmnet, logitr, randomForest or mda"
        os._exit(-1)

    rScript.write(
        '\nprint ("Section9: Creating the data frame to write in the file") \n'
    )
    rScript.write('dfForFile <- data.frame(' + features.keys()[0] +
                  pTargetVariableKey + '[,1]) \n')

    rScript.write(
        '\nprint ("Section10: Putting the probabilities in the data frame") \n'
    )
    rScript.write('dfForFile <- cbind(dfForFile,Prob) \n')

    if len(pDouble) == 0:
        rScript.write('\nprint ("Section11: Saving the predictions in file /p/'+ os.path.basename(os.path.dirname(args.e))+'/' + args.a + pTargetVariableKey + '-td.' + os.path.basename(os.path.abspath(args.td)) + \
                                 '-dt.' + args.dt + '-targetClass.' + args.targetClass + '-f.' + os.path.basename(os.path.dirname(pathToDesignFile)) + \
                                 "-wt." + args.wt+ attribute.generateExtension()  +'.predictions") \n')
    else:
        rScript.write('\nprint ("Section11: Saving the predictions in file /p/'+ os.path.basename(os.path.dirname(args.e))+'/' + args.a + pTargetVariableKey + '-td.' + os.path.basename(os.path.abspath(args.td)) + \
                                 '-dt.' + args.dt + '-targetClass.' + args.targetClass + '-f.' + os.path.basename(os.path.dirname(pathToDesignFile)) + \
                                 "-wt." + args.wt+ attribute.generateExtension()  +'double.predictions") \n')
    if pUseWhichArgumentForData == 4:
        if len(pDouble) == 0:
            rScript.write('fileName = paste(args[4],"/p/","' +os.path.basename(os.path.dirname(args.e))+'/'+ args.a + pTargetVariableKey + '-td.' + os.path.basename(os.path.abspath(args.td)) + \
                                     '-dt.' + args.dt + '-targetClass.' + args.targetClass + '-f.' + os.path.basename(os.path.dirname(pathToDesignFile))+ \
                                     "-wt." + args.wt+ attribute.generateExtension()  +'.predictions",sep="") \n')
        else:
            rScript.write('fileName = paste(args[4],"/p/","' +os.path.basename(os.path.dirname(args.e))+'/'+ args.a + pTargetVariableKey + '-td.' + os.path.basename(os.path.abspath(args.td)) + \
                                     '-dt.' + args.dt + '-targetClass.' + args.targetClass + '-f.' + os.path.basename(os.path.dirname(pathToDesignFile))+ \
                                     "-wt." + args.wt+ attribute.generateExtension()  +'double.predictions",sep="") \n')
    else:
        if len(pDouble) == 0:
            rScript.write('fileName = paste(args[2],"/p/","' +os.path.basename(os.path.dirname(args.e))+'/'+ args.a + pTargetVariableKey + '-td.' + os.path.basename(os.path.abspath(args.td)) + \
                                     '-dt.' + args.dt + '-targetClass.' + args.targetClass + '-f.' + os.path.basename(os.path.dirname(pathToDesignFile)) +\
                                     "-wt." + args.wt+ attribute.generateExtension()  + '.predictions",sep="") \n')
        else:
            rScript.write('fileName = paste(args[2],"/p/","' +os.path.basename(os.path.dirname(args.e))+'/'+ args.a + pTargetVariableKey + '-td.' + os.path.basename(os.path.abspath(args.td)) + \
                                     '-dt.' + args.dt + '-targetClass.' + args.targetClass + '-f.' + os.path.basename(os.path.dirname(pathToDesignFile)) +\
                                     "-wt." + args.wt+ attribute.generateExtension()  + 'double.predictions",sep="") \n')

    rScript.write('print (fileName) \n')
    rScript.write(
        'write.table(format(dfForFile,digits=16), file = fileName,sep=",",quote=FALSE)\n'
    )
Ejemplo n.º 23
0
 def scriptWrapperForPredictProgramRun(predictionDirAfterLastTD):
     scriptName=lExperimentFolderName+"/predict" + args.a + "-td." + os.path.basename(os.path.abspath(args.td)) + "-dt." + args.dt +"-pd."  +\
                 os.path.basename(os.path.abspath(predictionDirAfterLastTD)) + "-wt." + args.wt  + attribute.generateExtension() +".r"
     dirName = predictionDirAfterLastTD.replace('/ro/','/wf/')
     utility.runCommand([scriptName,"-d",dirName],args.run,args.sequence)
Ejemplo n.º 24
0
Archivo: rCodeGen.py Proyecto: grmte/ml
def ForPredictions(rScript,config,args,pathToDesignFile,pTargetVariableKey,pUseWhichArgumentForData=2,pDouble=""):
    features = config["features-"+pTargetVariableKey]
    #Renaming all features if model and predictions are done simultaneously , so that training and prediction data set do not conflict
    algo = getAlgoName(args)
    if len(pDouble)==0:
        predictionModel = algo + pTargetVariableKey + '-td.' + os.path.basename(os.path.abspath(args.td)) + '-dt.' + args.dt + '-targetClass.' + args.targetClass +\
                            "-wt." + args.wt+ attribute.generateExtension()  + '.model'
    else:
        predictionModel = algo + pTargetVariableKey + '-td.' + os.path.basename(os.path.abspath(args.td)) + '-dt.' + args.dt + '-targetClass.' + args.targetClass +\
                            "-wt." + args.wt+ attribute.generateExtension()  + 'double.model'       
    rScript.write('\nprint ("Section6: Read in prediction model'+os.path.dirname(pathToDesignFile)+'/'+predictionModel+'") \n')
    rScript.write('load("'+os.path.dirname(pathToDesignFile)+'/'+predictionModel+'")')

    if(args.a == 'glmnet'):
        rScript.write('\n\nprint ("Section7: Creating data frame") \n')
        rScript.write('df = cbind(')
        currentFeatureNumber=0
        for feature in features:
            rScript.write(feature+pTargetVariableKey+'[,2]')
            currentFeatureNumber = currentFeatureNumber+1
            if(len(features) > currentFeatureNumber):
                rScript.write(',')
        rScript.write(")\n\n")

        rScript.write('print ("Section8: Running ' + args.a + ' prediction") \n')
        rScript.write('Prob <- predict (fit, newx = df,s = "lambda.min",type = "response")')
        rScript.write("\n\n")
    elif(args.a == 'logitr'):
        rScript.write('\n\nprint ("Section7: Creating the data frame") \n')
        rScript.write('df = data.frame(')
        currentFeatureNumber=0
        for feature in features:
            userFriendlyName = features[feature]
            userFriendlyName = userFriendlyName.replace('[','')
            userFriendlyName = userFriendlyName.replace(']','')  
            userFriendlyName = userFriendlyName.replace('(','')
            userFriendlyName = userFriendlyName.replace(')','')        
            rScript.write(userFriendlyName+'='+feature+pTargetVariableKey+'[,2]')
            currentFeatureNumber = currentFeatureNumber+1
            if(len(features) > currentFeatureNumber):
                rScript.write(',')
        rScript.write(")\n\n")

        rScript.write('print ("Section8: Running ' + args.a + ' prediction") \n')
        rScript.write('Prob<- predict (fit, newdata = df, type = "response")')
        rScript.write("\n\n")
    elif(args.a == 'randomForest'):
        rScript.write('\n\nprint ("Section7: Creating the data frame") \n')
        rScript.write('df = data.frame(')
        currentFeatureNumber=0
        for feature in features:
            userFriendlyName = features[feature]
            userFriendlyName = userFriendlyName.replace('[','')
            userFriendlyName = userFriendlyName.replace(']','')  
            userFriendlyName = userFriendlyName.replace('(','')
            userFriendlyName = userFriendlyName.replace(')','')  
            rScript.write(userFriendlyName+'='+feature+pTargetVariableKey+'[,2]')
            currentFeatureNumber = currentFeatureNumber+1
            if(len(features) > currentFeatureNumber):
                rScript.write(',')
        rScript.write(")\n\n")

        rScript.write('print ("Section8: Running ' + args.a + ' prediction") \n')
        rScript.write('Prob<- predict (fit, df)')
        rScript.write("\n\n")
    elif(args.a == 'mda'):
        rScript.write('\n\nprint ("Section7: Creating the data frame") \n')
        rScript.write('df = data.frame(')
        currentFeatureNumber=0
        for feature in features:
            userFriendlyName = features[feature]
            userFriendlyName = userFriendlyName.replace('[','')
            userFriendlyName = userFriendlyName.replace(']','')  
            userFriendlyName = userFriendlyName.replace('(','')
            userFriendlyName = userFriendlyName.replace(')','')  
            rScript.write(userFriendlyName+'='+feature+pTargetVariableKey+'[,2]')
            currentFeatureNumber = currentFeatureNumber+1
            if(len(features) > currentFeatureNumber):
                rScript.write(',')
        rScript.write(")\n\n")

        rScript.write('print ("Section8: Running ' + algo + ' prediction") \n')
        rScript.write('Prob<- predict (fit, df)')
        rScript.write("\n\n")
    else:
        print "The only valid options are glmnet, logitr, randomForest or mda"
        os._exit(-1)

    rScript.write('\nprint ("Section9: Creating the data frame to write in the file") \n')
    rScript.write('dfForFile <- data.frame('+features.keys()[0]+pTargetVariableKey+'[,1]) \n')
    
    rScript.write('\nprint ("Section10: Putting the probabilities in the data frame") \n')
    rScript.write('dfForFile <- cbind(dfForFile,Prob) \n')
   
    if len(pDouble)==0: 
        rScript.write('\nprint ("Section11: Saving the predictions in file /p/'+ os.path.basename(os.path.dirname(args.e))+'/' + args.a + pTargetVariableKey + '-td.' + os.path.basename(os.path.abspath(args.td)) + \
                                 '-dt.' + args.dt + '-targetClass.' + args.targetClass + '-f.' + os.path.basename(os.path.dirname(pathToDesignFile)) + \
                                 "-wt." + args.wt+ attribute.generateExtension()  +'.predictions") \n')
    else:
        rScript.write('\nprint ("Section11: Saving the predictions in file /p/'+ os.path.basename(os.path.dirname(args.e))+'/' + args.a + pTargetVariableKey + '-td.' + os.path.basename(os.path.abspath(args.td)) + \
                                 '-dt.' + args.dt + '-targetClass.' + args.targetClass + '-f.' + os.path.basename(os.path.dirname(pathToDesignFile)) + \
                                 "-wt." + args.wt+ attribute.generateExtension()  +'double.predictions") \n')
    if pUseWhichArgumentForData == 4:
        if len(pDouble)==0:
            rScript.write('fileName = paste(args[4],"/p/","' +os.path.basename(os.path.dirname(args.e))+'/'+ args.a + pTargetVariableKey + '-td.' + os.path.basename(os.path.abspath(args.td)) + \
                                     '-dt.' + args.dt + '-targetClass.' + args.targetClass + '-f.' + os.path.basename(os.path.dirname(pathToDesignFile))+ \
                                     "-wt." + args.wt+ attribute.generateExtension()  +'.predictions",sep="") \n')
        else:
            rScript.write('fileName = paste(args[4],"/p/","' +os.path.basename(os.path.dirname(args.e))+'/'+ args.a + pTargetVariableKey + '-td.' + os.path.basename(os.path.abspath(args.td)) + \
                                     '-dt.' + args.dt + '-targetClass.' + args.targetClass + '-f.' + os.path.basename(os.path.dirname(pathToDesignFile))+ \
                                     "-wt." + args.wt+ attribute.generateExtension()  +'double.predictions",sep="") \n')            
    else:
        if len(pDouble)==0:
            rScript.write('fileName = paste(args[2],"/p/","' +os.path.basename(os.path.dirname(args.e))+'/'+ args.a + pTargetVariableKey + '-td.' + os.path.basename(os.path.abspath(args.td)) + \
                                     '-dt.' + args.dt + '-targetClass.' + args.targetClass + '-f.' + os.path.basename(os.path.dirname(pathToDesignFile)) +\
                                     "-wt." + args.wt+ attribute.generateExtension()  + '.predictions",sep="") \n')
        else:
            rScript.write('fileName = paste(args[2],"/p/","' +os.path.basename(os.path.dirname(args.e))+'/'+ args.a + pTargetVariableKey + '-td.' + os.path.basename(os.path.abspath(args.td)) + \
                                     '-dt.' + args.dt + '-targetClass.' + args.targetClass + '-f.' + os.path.basename(os.path.dirname(pathToDesignFile)) +\
                                     "-wt." + args.wt+ attribute.generateExtension()  + 'double.predictions",sep="") \n')
            
    rScript.write('print (fileName) \n')
    rScript.write('write.table(format(dfForFile,digits=16), file = fileName,sep=",",quote=FALSE)\n')
Ejemplo n.º 25
0
def main():
    parser = argparse.ArgumentParser(
        description=
        'Generates train.r. A sample command is mGenForE.py -e ob/e1/ ')
    parser.add_argument(
        '-e',
        required=True,
        help='Experiement folder to use to find the features and targets')
    parser.add_argument('-a', required=True, help='Algorithm name')
    parser.add_argument(
        '-targetClass',
        required=True,
        help=
        "binomial(target takes only true and false) / multinomial (target values takes more than 2 values)"
    )
    parser.add_argument(
        '-skipT',
        required=False,
        help=
        "yes or no , If you want to regenerate already generated algorithm model file then make this value No"
    )
    parser.add_argument('-td',
                        required=True,
                        help="Day on which it was trained")
    parser.add_argument('-dt',
                        required=True,
                        help="Number of days it was trained")
    parser.add_argument(
        '-wt',
        required=True,
        help="default/exp , weight type to be given to different days")
    parser.add_argument('-iT', required=False, help='Instrument name')
    parser.add_argument('-sP',
                        required=False,
                        help='Strike price of instrument')
    parser.add_argument('-oT', required=False, help='Options Type')
    parser.add_argument('-treeType',
                        required=False,
                        help="Tree read for trade engine")
    parser.add_argument('-tTD',
                        required=False,
                        help="Tree number of days to be used")
    args = parser.parse_args()

    attribute.initializeInstDetails(args.iT, args.sP, args.oT)
    if args.skipT == None:
        args.skipT = "yes"
    if args.tTD == None:
        args.tTD = args.dt
    print "Using the experiment folder " + args.e

    config = ConfigObj(args.e + "/design1.ini")
    configInit = ConfigObj(args.e + "design.ini")

    #     configInitList = []
    #     for iniFile in os.listdir(args.e + "/"):
    #         if '.ini' in iniFile and iniFile != 'design.ini':
    #             index = iniFile[ file.index(".") - 1 ]
    #             configInitList.append( ( index, ConfigObj(args.e+"/"+iniFile) ) )
    #     configInit = dict(configInitList)

    print "The config parameters that I am working with are"
    print config

    dirName = os.path.dirname(args.e) + "/"

    algo = rCodeGen.getAlgoName(args)

    rProgName = "traintree" + "-td." + os.path.basename(
        os.path.abspath(args.td)
    ) + "-tTD" + args.tTD + "-dt." + args.dt + "-wt." + args.wt + attribute.generateExtension(
    ) + ".r"
    rProgLocation = dirName + '/' + rProgName
    rScript = open(rProgLocation, 'w')
    rScript.write('#!/usr/bin/Rscript \n')
    rScript.write('require (rpart) \n')

    rCodeGen.ForSetUpChecks(rScript)
    lAllFilePresent = True

    for target in config['target']:
        lTreeFileName = dirName + "/" + algo + target + '-td.' + os.path.basename(
            os.path.abspath(args.td)
        ) + "-tTD" + args.tTD + '-dt.' + args.dt + attribute.generateExtension(
        ) + ".tree" + args.treeType
        if os.path.isfile(lTreeFileName) and (args.skipT.lower() == "yes"):
            continue
        else:
            lAllFilePresent = False
            break
    if lAllFilePresent == False:
        rCodeGen.ToReadTargetFile(rScript, config)
        rCodeGen.ForWtVectorGeneration(rScript, args.wt.lower())

        for target in config['target']:
            rCodeGen.ToReadFeatureFiles(rScript, config, target)
            rCodeGen.ToReadPredictionFiles(rScript, config, target, configInit)
            rCodeGen.ForSanityChecks(rScript, config, target)
            lTreeFileName = dirName + "/" + algo + target + '-td.' + os.path.basename(
                os.path.abspath(args.td)
            ) + "-tTD" + args.tTD + '-dt.' + args.dt + attribute.generateExtension(
            ) + ".tree" + args.treeType
            if os.path.isfile(lTreeFileName) and (args.skipT.lower() == "yes"):
                print "Model File " + lTreeFileName + " already exists . So it will not be formed again . If you want to re-generate model then re-run with -skipT=No"
            else:
                rCodeGen.ToRenameDataBeforeTraining(rScript, config, target)
                rCodeGen.ForTrainingTree(rScript, args, config, target,
                                         args.treeType)
                print lTreeFileName
                rCodeGen.saveTrainingTree(rScript, args, dirName, target,
                                          lTreeFileName)

    rScript.close()
    print "Finished generating R training program: " + rProgLocation
    os.system("chmod +x " + rProgLocation)
Ejemplo n.º 26
0
    pLPerLotShort=(averageOpenSellPrice - averageCloseBuyPrice)* 1000
    pLPerLotLong=(averageCloseSellPrice - averageOpenBuyPrice)* 1000
    print("1 lot has 1000 qty's so P/L Short per lot is: " + str(pLPerLotShort), file = outputFile)
    print("1 lot has 1000 qty's so P/L Long per lot is: " + str(pLPerLotLong), file = outputFile)
    print("P/L for Short trading 10 lots is: " + str(pLPerLotShort * 10), file = outputFile)
    print("P/L for Long trading 10 lots is: " + str(pLPerLotLong * 10), file = outputFile)


if __name__ == "__main__":
    tStart = datetime.now()
    dirName = args.pd.replace('/ro/','/rs/')
    checkAllFilesAreExistOrNot = 'false'
    
    lWFDirName = args.pd.replace('/ro/','/wf/')
    predictedBuyValuesFileName = lWFDirName+"/p/"+mainExperimentName+"/"+args.a + 'buy' + '-td.' + os.path.basename(os.path.abspath(args.td)) + '-dt.' + \
    args.dt + '-targetClass.' + args.targetClass + '-f.' + buyExperimentName + "-wt." + args.wt+ attribute.generateExtension() + ".predictions"
    
    predictedSellValuesFileName = lWFDirName+"/p/"+mainExperimentName+"/"+args.a + 'sell' + '-td.' + os.path.basename(os.path.abspath(args.td)) + '-dt.' +\
    args.dt + '-targetClass.' + args.targetClass + '-f.' + sellExperimentName + "-wt." + args.wt+ attribute.generateExtension() + ".predictions"

    lEntryClList = args.entryCL.split(";")
    lExitClList = args.exitCL.split(";")
    if len(lEntryClList)!= len(lExitClList):
        print("Len of entry and exit list does match. Entry List length = " , len(lEntryClList) , " and ExitCL List length = " , len(lExitClList))
        os._exit(-1)
    lengthOfList = len(lEntryClList)
    
    lMinOfExitCl = 9999.000
    fileNameList = []
    finalEntryClList = []
    finalExitClList = []
Ejemplo n.º 27
0
parser.add_argument('-sequence', required=True,help='lp / dp / serial')
parser.add_argument('-dt',required=False,help='No of day from start for which it is to be trained ')
parser.add_argument('-wt',required=True,help="default/exp , weight type to be given to different days")
parser.add_argument('-iT',required=False,help='Instrument name')
parser.add_argument('-sP',required=False,help='Strike price of instrument')
parser.add_argument('-oT',required=False,help='Options Type')
args = parser.parse_args()

attribute.initializeInstDetails(args.iT,args.sP,args.oT)
if args.a is not None:
    algo = args.a
else:
    algo = 'glmnet'

if args.dt == None:
    args.dt = "1"

dirName = args.td.replace('/ro/','/wf/')
scriptName = args.e+"/train" + algo + "-td." + os.path.basename(os.path.abspath(args.td)) + "-dt." + args.dt + "-wt." + args.wt + attribute.generateExtension() +".r"
trainingDataList = attribute.getListOfTrainingDirectoriesNames(args.dt,dirName,args.iT)
trainingDataListString = ";".join(trainingDataList)
utility.runCommand([scriptName,"-d",trainingDataListString],args.run,args.sequence)

dirName = args.pd.replace('/ro/','/wf/')    
scriptName=args.e+"/predict" + algo + "-td." + os.path.basename(os.path.abspath(args.td)) + "-dt." + args.dt +\
             "-pd."  + os.path.basename(os.path.abspath(args.pd)) + "-wt." + args.wt + attribute.generateExtension() +".r"
utility.runCommand([scriptName,"-d",dirName],args.run,args.sequence)



Ejemplo n.º 28
0
    else:
        mainExperimentName = pathAfterE

    mainExperimentNameList.append(mainExperimentName)
    experimentName = os.path.basename(absPathOfExperimentName)
    sys.path.append("./src/")
    sys.path.append("./ob/generators/")

    config = ConfigObj(experiment + "/design.ini")
    featureTargetFilePath = args.pd.replace('ro', 'wf')

    for feature in config["features-buy"]:
        lName = config["features-buy"][feature].replace('(',
                                                        '').replace(')', '')
        if lName not in featureNames:
            lFeatureFile = featureTargetFilePath + "/f/" + lName + attribute.generateExtension(
            ) + ".feature"
            featureFP = open(lFeatureFile, "rb")
            featureFpList.append(featureFP)
            featureNames.append(lName)

    for feature in config["features-sell"]:
        lName = config["features-sell"][feature].replace('(',
                                                         '').replace(')', '')
        if lName not in featureNames:
            lFeatureFile = featureTargetFilePath + "/f/" + lName + attribute.generateExtension(
            ) + ".feature"
            featureFP = open(lFeatureFile, "rb")
            featureFpList.append(featureFP)
            featureNames.append(lName)

    dirName = args.pd.replace('/ro/', '/wf/')
Ejemplo n.º 29
0
    required=True,
    help="default/exp , weight type to be given to different days")
parser.add_argument('-iT', required=False, help='Instrument name')
parser.add_argument('-sP', required=False, help='Strike price of instrument')
parser.add_argument('-oT', required=False, help='Options Type')
args = parser.parse_args()

attribute.initializeInstDetails(args.iT, args.sP, args.oT)
if args.a is not None:
    algo = args.a
else:
    algo = 'glmnet'

if args.dt == None:
    args.dt = "1"

dirName = args.td.replace('/ro/', '/wf/')
scriptName = args.e + "/train" + algo + "-td." + os.path.basename(
    os.path.abspath(args.td)
) + "-dt." + args.dt + "-wt." + args.wt + attribute.generateExtension() + ".r"
trainingDataList = attribute.getListOfTrainingDirectoriesNames(
    args.dt, dirName, args.iT)
trainingDataListString = ";".join(trainingDataList)
utility.runCommand([scriptName, "-d", trainingDataListString], args.run,
                   args.sequence)

dirName = args.pd.replace('/ro/', '/wf/')
scriptName=args.e+"/predict" + algo + "-td." + os.path.basename(os.path.abspath(args.td)) + "-dt." + args.dt +\
             "-pd."  + os.path.basename(os.path.abspath(args.pd)) + "-wt." + args.wt + attribute.generateExtension() +".r"
utility.runCommand([scriptName, "-d", dirName], args.run, args.sequence)
Ejemplo n.º 30
0
if len(args.nodes) == 0:
    for target in ['buy', 'sell']:
        lTreeFileName = "/home/vikas/ml/ob/e/nsecur/ABAll_AmBRAmBAll/s/2c/AmBRAmB//glmnet" + target + "-td.20140821-tTD30-dt.10.tree1"  #args.e+"/"+args.a+ target+'-td.' + os.path.basename(os.path.abspath(args.td)) + '-dt.' + args.dt + attribute.generateExtension() +".tree" + args.treeType
        dd.gGlobalTree[target], lVariable = reading_tree.reading_tree(
            lTreeFileName, args.treeType)
        dd.gTreeVariablesPresent = dd.gTreeVariablesPresent + lVariable
        dd.gFinalCondition[target]["0"] = ''
        dd.gFinalCondition[target]["0"] = reading_tree.traverse_tree(
            1, args.treeType, 0.0, dd.gGlobalTree[target],
            dd.gFinalCondition[target]["0"])
        print("Calling tree traversal ")  #,dd.gFinalCondition[target]["0"])
else:
    for target in ['buy', 'sell']:
        lTreeFileName = args.e + "/" + args.a + target + '-td.' + os.path.basename(
            os.path.abspath(
                args.td)) + '-dt.' + args.dt + attribute.generateExtension(
                ) + ".tree" + args.treeType
        dd.gGlobalTree[target], lVariable = reading_tree.reading_tree(
            lTreeFileName, args.treeType)
        dd.gTreeVariablesPresent = dd.gTreeVariablesPresent + lVariable
        nodes = args.nodes.split(";")
        dd.gFinalCondition[target]['nodes'] = reading_tree.traverse_nodes(
            args.treeType, nodes, dd.gGlobalTree[target])

config = ConfigObj(args.e + "/design1.ini")
lListOfPredictionDirectory = attribute.getListOfTrainingDirectoriesNames(
    int(args.nPD), args.pd, args.iT)
lBuyOutputFileObject = args.e + "/Buy-OutOfSampleTree-" + '-pd.' + os.path.basename(
    os.path.abspath(
        args.pd)) + '-nPD.' + args.nPD + attribute.generateExtension(
        ) + ".tree" + args.treeType
lSellOutputFileObject = args.e + "/Sell-OutOfSampleTree-" + '-pd.' + os.path.basename(
Ejemplo n.º 31
0
Archivo: tradeE3.py Proyecto: grmte/ml
if 'nsecur' in absPathOfExperimentName:
    pathAfterE = absPathOfExperimentName[absPathOfExperimentName.index("/nsecur/")+8:]
elif 'nsefut' in absPathOfExperimentName:
    pathAfterE = absPathOfExperimentName[absPathOfExperimentName.index("/nsefut/")+8:]
elif 'nseopt' in absPathOfExperimentName:
    pathAfterE = absPathOfExperimentName[absPathOfExperimentName.index("/nseopt/")+8:]

if "/" in pathAfterE:
    mainExperimentName = pathAfterE[:pathAfterE.index("/")]
else:
    mainExperimentName = pathAfterE
    
experimentName = os.path.basename(absPathOfExperimentName)
initialFileName = args.a + '-td.' + os.path.basename(os.path.abspath(args.td)) + \
               '-dt.' + args.dt + '-targetClass.' + args.targetClass + '-f.' + experimentName + "-wt." + args.wt+ attribute.generateExtension() + \
               '-l.'+args.entryCL+"-"+args.exitCL + "-te3"    

gTickSize = args.tickSize
def getPredictedValuesIntoDict(pPredictedValuesDict):
    # The following will take care if args.e = "ob/e1/" or args.e = "ob/e1"
    dirName = args.pd.replace('/ro/','/wf/')
    config = ConfigObj(args.e+"/design.ini")
    target = config["target"]
    predictedValuesFileName = dirName+"/p/"+mainExperimentName+"/"+args.a + target.keys()[0] + '-td.' + os.path.basename(os.path.abspath(args.td)) + \
                                 '-dt.' + args.dt + '-targetClass.' + args.targetClass + '-f.' + experimentName +  "-wt." + args.wt+ attribute.generateExtension() + ".predictions"
    print("Predicted values file : "+ predictedValuesFileName)
    sys.stdout.flush()
    predictedValuesFile = open(predictedValuesFileName)
    fileHasHeader = True
    numberOfLinesInPredictedValuesFile = 0
Ejemplo n.º 32
0
                lTreeTrainingList = []
                for i in range(len(allDataDirectories)-int(args.dt)):
                    args.td = allDataDirectories[i]
                    predictionDirLastTD = allDataDirectories[i + int(args.dt) - 1]
                    predictionDirAfterLastTD = allDataDirectories[i + int(args.dt)]

                    lRCodeGenCommandList.append(["mRGenForE.py","-e",lExperimentFolderName,"-a",algo,"-targetClass",args.targetClass,"-skipM",args.skipM,"-td",args.td, "-dt" , \
                                                 args.dt , '-wt' , wt,"-iT",args.iT,"-oT",args.oT,"-sP",args.sP ,'-double', args.double])
#                     lRCodeGenCommandList.append(["pRGenForE.py","-e",args.e,"-s",lExperimentFolderName,"-a",algo,"-skipP",args.skipP,"-td",args.td , "-pd" , predictionDirLastTD , \
#                                                  "-dt" , args.dt , "-targetClass" , args.targetClass , '-wt' , wt,"-iT",args.iT,"-oT",args.oT,"-sP",args.sP,'-double', args.double])
                    lRCodeGenCommandList.append(["pRGenForE.py","-e",args.e,"-s",lExperimentFolderName,"-a",algo,"-skipP",args.skipP,"-td",args.td , "-pd" , predictionDirAfterLastTD ,\
                                                  "-dt" , args.dt , "-targetClass" , args.targetClass , '-wt' , wt,"-iT",args.iT,"-oT",args.oT,"-sP",args.sP,'-double', args.double])

                    dirName = args.td.replace('/ro/','/wf/')
                    if args.double:
                        scriptName = lExperimentFolderName+"/train" + algo + "-td." + os.path.basename(os.path.abspath(args.td)) + "-dt." + args.dt + "-wt." + wt + attribute.generateExtension() +"double.r"
                    else:
                        scriptName = lExperimentFolderName+"/train" + algo + "-td." + os.path.basename(os.path.abspath(args.td)) + "-dt." + args.dt + "-wt." + wt + attribute.generateExtension() +".r"
                    
                    trainingDataCorrespondingDateList = []    
                    trainingDataList = [] #";".join(allDataDirectories[i:i+ int(args.dt) ])
                    lCount = i
                    treeDataList = []
                    for trainDirs in allDataDirectories[i:i+ int(args.dt)]:
                        trainingDataList.append(trainDirs.replace('/ro/','/wf/'))  
                        
                    
                    try:
                        allDataDirectories[i+ int(args.tTD)]

                        for treeDirs in allDataDirectories[i:i+ int(args.tTD)]:
Ejemplo n.º 33
0
    #results = map(scriptWrapperForFeatureGeneration,allDataDirectories) 
    pass

#==========R Code formation to find correlation between features and target file ==============================0
utility.runCommand(["corrRGenForEForAllDays.py","-e",l_exp_dir,"-td",args.td,"-dt",args.dt,"-iT",args.iT,"-oT",args.oT,"-sP",args.sP],args.run,args.sequence)
if args.sequence=="dp": 
    print dp.printGroupStatus()

#========Running the correlation R program=========================
allWorkingFileDirectories =  attribute.getListOfTrainingDirectoriesNames( int(args.nDays) , args.td.replace('/ro/','/wf/') ,args.iT)
allWorkingFileDirectoriesString = ";".join(allWorkingFileDirectories)
lCorrCommandList = []
if args.sequence == "dp":
    for l_training_day in allWorkingFileDirectories:
        lDate = os.path.basename(os.path.abspath(l_training_day))
        lFileName = l_exp_dir + "/corr-date-" + lDate + "-td." + os.path.basename(os.path.abspath(args.td)) + "-dt." + args.dt + attribute.generateExtension() +".r"
        lCorrCommandList.append([lFileName,'-d',l_training_day])
    utility.runListOfCommandsWithMaxUtlilizationOfWorkers(lCorrCommandList,args,"Day-wise Correlation",int(args.nComputers))
else:
    def scriptWrapperForDayWiseCorrelation(pTrainingDay):
        lDate = os.path.basename(os.path.abspath(pTrainingDay))
        lFileName = l_exp_dir + "/corr-date-" + lDate + "-td." + os.path.basename(os.path.abspath(args.td)) + "-dt." + args.dt + attribute.generateExtension() +".r"
        utility.runCommand([lFileName,'-d',pTrainingDay],args.run,args.sequence)
    results = map(scriptWrapperForDayWiseCorrelation,allWorkingFileDirectories)
    
summary_file_name = l_exp_dir + '/correlation-coef' + '-td.' + os.path.basename(os.path.abspath(args.td))+ '-dt.' + args.dt + attribute.generateExtension() + ".coef" 
wfo = open(summary_file_name, 'w')

lBuyDict = {}
lSellDict = {}
lDayWiseBuy = {}
Ejemplo n.º 34
0
def main():
    parser = argparse.ArgumentParser(description='Generates train.r. A sample command is mGenForE.py -e ob/e1/ ')
    parser.add_argument('-e', required=True,help='Experiement folder to use to find the features and targets')
    parser.add_argument('-a', required=True,help='Algorithm name')
    parser.add_argument('-s', required=True,help='Location of the folder containing all the sub experiments')
    parser.add_argument('-targetClass',required=True,help="binomial(target takes only true and false) / multinomial (target values takes more than 2 values)")
    parser.add_argument('-skipM',required=False,help="yes or no , If you want to regenerate already generated algorithm model file then make this value No")
    parser.add_argument('-skipP',required=False,help="yes or no , If you want to regenerate already generated algorithm prediction file then make this value No")
    parser.add_argument('-pd', required=True,help='Prediction directory')
    parser.add_argument('-td',required=True,help="Day on which it was trained")
    parser.add_argument('-dt',required=True,help="Number of days it was trained")
    parser.add_argument('-wt',required=True,help="default/exp , weight type to be given to different days")
    parser.add_argument('-iT',required=False,help='Instrument name')
    parser.add_argument('-sP',required=False,help='Strike price of instrument')
    parser.add_argument('-oT',required=False,help='Options Type')
    args = parser.parse_args()

    attribute.initializeInstDetails(args.iT,args.sP,args.oT)
    if args.skipM == None:
        args.skipM = "yes"
    if args.skipP == None:
        args.skipP = "yes"
                
    print "Using the experiment folder " + args.e
    
    print "Training files steps"
    config = ConfigObj(args.e+"/design.ini")

    print "The config parameters that I am working with are"
    print config 

    predictionDataDirectoryName = args.pd.replace('/ro/','/wf/')
    predictionDataDirectoryName = predictionDataDirectoryName + "/p/" + os.path.basename(os.path.dirname(args.e))
    if not os.path.exists(predictionDataDirectoryName):
        os.mkdir(predictionDataDirectoryName)
        
    dirName=os.path.dirname(args.e)

    algo = rCodeGen.getAlgoName(args)

    args.s = args.s + "/"
    rProgName = "trainPredict"+ algo + "-td." + os.path.basename(os.path.abspath(args.td)) + "-dt." + args.dt +\
     "-pd." + os.path.basename(os.path.abspath(args.pd)) +  "-wt." + args.wt+ attribute.generateExtension()  + "-For"+os.path.basename(os.path.dirname(args.s))+"SubE.r"
    rProgLocation = dirName+'/'+rProgName
    rScript = open(rProgLocation,'w')

    rScript.write('#!/usr/bin/Rscript \n')

    if(algo == 'glmnet'):
        rScript.write('require (glmnet) \n')
    elif(algo == 'randomForest'):
        rScript.write('require (randomForest) \n')
    elif(algo == 'mda'):
        rScript.write('require (mda) \n')
    
    rCodeGen.ForSetUpChecksForTrainPredictTogather(rScript)
    rCodeGen.ToReadTargetFile(rScript,config)
    rCodeGen.ForWtVectorGeneration(rScript,args.wt.lower())
    for target in config['target']:
        rCodeGen.ToReadFeatureFiles(rScript,config,target,2)
        rCodeGen.ForSanityChecks(rScript,config,target)

    print "For prediction data set"
    configForPredictions = ConfigObj(args.e+"/design.ini")
    print "The config parameters that I am working with are"
    for target in configForPredictions['target']:
        feature_keys = configForPredictions['features-'+target].keys()
        features = configForPredictions['features-'+target]
        for key in feature_keys:
            new_key = key + "P"
            features[new_key] = features[key]
            del features[key]
        print configForPredictions 

        rCodeGen.ToReadFeatureFiles(rScript,configForPredictions,target,4)
        rCodeGen.ForSanityChecks(rScript,configForPredictions,target)
    
    designFiles = utility.list_files(args.s)

    for designFile in designFiles:
        print "Generating r code for " + designFile
        rScript.write('\n\nprint ("Running r code for ' + designFile + '")\n')
        config = ConfigObj(designFile)
        configForPredictions = ConfigObj(designFile)
        #--------------MODEL--------------------
        for target in config['target']:
            feature_keys = configForPredictions['features-'+target].keys()
            features = configForPredictions['features-'+target]
            for key in feature_keys:
                new_key = key + "P"
                features[new_key] = features[key]
                del features[key]
            lModelGeneratedAfterTraining = os.path.dirname(designFile) + '/' + algo + target + '-td.' + os.path.basename(os.path.abspath(args.td)) + \
            '-dt.' + args.dt + '-targetClass.' + args.targetClass + "-wt." + args.wt+ attribute.generateExtension()  + '.model'
            if os.path.isfile(lModelGeneratedAfterTraining)and ( args.skipM.lower() == "yes" ):
                print "Model File " + lModelGeneratedAfterTraining + " already exists . So it will not be formed again . If you want to re-generate model then re-run with -skipM=No"
            else:
                rCodeGen.ToCreateDataFrameForTraining(rScript,config,target)
                rCodeGen.ForTraining(rScript,args,config,target)
                rCodeGen.saveTrainingModel(rScript,args,os.path.dirname(designFile),target)
        
        #--------------Prediction Part--------------------
            predictionFileName = predictionDataDirectoryName + "/" +  args.a + target + '-td.' + os.path.basename(os.path.abspath(args.td)) + \
                                 '-dt.' + args.dt + '-targetClass.' + args.targetClass + '-f.' + os.path.basename(os.path.dirname(designFile)) +\
                                 "-wt." + args.wt+ attribute.generateExtension()  +".predictions"
            if not os.path.isfile(predictionFileName) or ( args.skipP.lower() == "no" ):
                rCodeGen.ForPredictions(rScript,configForPredictions,args,designFile,target,4)
            else:
                print "Prediction File " + predictionFileName + "Already exists , not generating it again . If you want to generate it again then rerun it with -skipP no "
    rScript.write('rm(list=ls())')
    rScript.close()
    print "Finished generating R training program: " + rProgLocation
    os.system("chmod +x "+rProgLocation)
Ejemplo n.º 35
0
Archivo: main.py Proyecto: grmte/ml
    for target in ['buy','sell']:
        lTreeFileName = "/home/vikas/ml/ob/e/nsecur/ABAll_AmBRAmBAll//s/2c/AmBRAmB//glmnet" + target  + "-td.20140821-tTD30-dt.10.tree1"#args.e+"/"+args.a+ target+'-td.' + os.path.basename(os.path.abspath(args.td)) + '-dt.' + args.dt + attribute.generateExtension() +".tree" + args.treeType
        dd.gGlobalTree[target],lVariable = reading_tree.reading_tree(lTreeFileName,args.treeType)
        dd.gTreeVariablesPresent = dd.gTreeVariablesPresent + lVariable
        for entry,exit in zip(lEntryClList,lExitClList):
            if entry not in dd.gFinalCondition[target]:
                dd.gFinalCondition[target][entry] = ''
                dd.gFinalCondition[target][entry] = reading_tree.traverse_tree(1,args.treeType,float("."+entry),dd.gGlobalTree[target],dd.gFinalCondition[target][entry])
                print("Calling tree traversal ",dd.gFinalCondition[target][entry])
            if exit not in dd.gFinalCondition[target]:
                dd.gFinalCondition[target][exit] = ''
                dd.gFinalCondition[target][exit] = reading_tree.traverse_tree(1,args.treeType,float("."+exit),dd.gGlobalTree[target],dd.gFinalCondition[target][exit])
                print("Calling tree traversal " ,dd.gFinalCondition[target][exit])
else:
    for target in ['buy','sell']:
        lTreeFileName = args.e+"/"+args.a+ target+'-td.' + os.path.basename(os.path.abspath(args.td)) + '-dt.' + args.dt + attribute.generateExtension() +".tree" + args.treeType
        dd.gGlobalTree[target],lVariable = reading_tree.reading_tree(lTreeFileName,args.treeType)
        dd.gTreeVariablesPresent = dd.gTreeVariablesPresent + lVariable
        nodes = args.nodes.split(";")
        dd.gFinalCondition[target]['nodes'] = reading_tree.traverse_nodes(args.treeType,nodes,dd.gGlobalTree[target])
    
config = ConfigObj(args.e+"/design1.ini")
    
for variable in dd.gTreeVariablesPresent:
    if variable.lower()=="j":
        predictedBuyValuesFileName = lWFDirName+"/p/"+mainExperimentName+"/"+args.a + 'buy' + '-td.' + os.path.basename(os.path.abspath(args.td)) + '-dt.' + \
                                args.dt + '-targetClass.' + args.targetClass + '-f.' + experimentName + "-wt." + args.wt+ attribute.generateExtension() + ".predictions"
        dd.gFileObjectsOfVariablesPresent.append(open(predictedBuyValuesFileName,'r'))
    elif variable.lower()=="k":
        predictedSellValuesFileName = lWFDirName+"/p/"+mainExperimentName+"/"+args.a + 'sell' + '-td.' + os.path.basename(os.path.abspath(args.td)) + '-dt.' +\
                                    args.dt + '-targetClass.' + args.targetClass + '-f.' + experimentName + "-wt." + args.wt+ attribute.generateExtension() + ".predictions"
Ejemplo n.º 36
0
    pathAfterE = absPathOfExperimentName[absPathOfExperimentName.index("/nsecur/")+8:]
elif 'nsefut' in absPathOfExperimentName:
    pathAfterE = absPathOfExperimentName[absPathOfExperimentName.index("/nsefut/")+8:]
elif 'nseopt' in absPathOfExperimentName:
    pathAfterE = absPathOfExperimentName[absPathOfExperimentName.index("/nseopt/")+8:]
    
if "/" in pathAfterE:
    mainExperimentName = pathAfterE[:pathAfterE.index("/")]
else:
    mainExperimentName = pathAfterE
    
experimentName = os.path.basename(absPathOfExperimentName)
gTickSize = int(args.tickSize)
gMaxQty = int(args.orderQty)

initialFileName ='TradeOnTarget-d.' + os.path.basename(os.path.abspath(args.d))+ attribute.generateExtension() + "-tq." + args.orderQty + attribute.generateExtension()  + "-dte.7" 
    

g_quantity_adjustment_list_for_sell = {}
g_quantity_adjustment_list_for_buy = {}
gOpenBuyPrice = 0.0
gCloseSellPrice = 0.0
gOpenSellPrice = 0.0
gCloseBuyPrice = 0.0

def functionToReadTargetFileToDictionary(pTargetValuesFile,pTargetValuesDict,pFileHeader):
    lNumberOfLinesInTargetValuesFile = 0
    for line in pTargetValuesFile:
        if pFileHeader == True:
            pFileHeader = False
            continue
Ejemplo n.º 37
0
Archivo: tradeE12.py Proyecto: grmte/ml
def getPredictedValuesIntoDict(pPredictedValuesDict):
    # The following will take care if args.e = "ob/e1/" or args.e = "ob/e1"
    dirName = args.pd.replace('/ro/','/wf/')
    config = ConfigObj(args.e+"/design.ini")
    target = config["target"]
    lPredictedBuyValuesDict = dict()
    predictedBuyValuesFileName = dirName+"/p/"+mainExperimentName+"/"+args.a + target.keys()[0] + '-td.' + os.path.basename(os.path.abspath(args.td)) + \
                                 '-dt.' + args.dt + '-targetClass.' + args.targetClass + '-f.' + experimentName + "-wt." + args.wt+ attribute.generateExtension() + ".predictions"
    print("Buy Predicted values file : "+ predictedBuyValuesFileName)
    sys.stdout.flush()
    predictedBuyValuesFile = open(predictedBuyValuesFileName)
    fileHasHeader = True
    numberOfLinesInBuyPredictedValuesFile = functionToReadPredictionFileToDictionary(predictedBuyValuesFile,lPredictedBuyValuesDict,fileHasHeader)
    print("Finished reading the buy predicted values file")    
    print("The number of elements in the buy predicted values dictionary is : " + str(len(lPredictedBuyValuesDict)))
    if (numberOfLinesInBuyPredictedValuesFile != len(lPredictedBuyValuesDict)):
        print("Number of duplicate time stamps rejected in buy predicted values dictionary = " + str(numberOfLinesInBuyPredictedValuesFile - len(lPredictedBuyValuesDict)))
        #os._exit(-1)
    sys.stdout.flush()

    lPredictedSellValuesDict = dict()
    predictedSellValuesFileName = dirName+"/p/"+mainExperimentName+"/"+args.a + target.keys()[1] + '-td.' + os.path.basename(os.path.abspath(args.td)) + \
                                 '-dt.' + args.dt + '-targetClass.' + args.targetClass + '-f.' + experimentName +  "-wt." + args.wt + attribute.generateExtension()+ ".predictions"
    print("Sell Predicted values file : "+ predictedSellValuesFileName)
    sys.stdout.flush()
    predictedSellValuesFile = open(predictedSellValuesFileName)
    fileHasHeader = True
    numberOfLinesInSellPredictedValuesFile = functionToReadPredictionFileToDictionary(predictedSellValuesFile,lPredictedSellValuesDict,fileHasHeader)
    print("Finished reading the sell predicted values file")    
    print("The number of elements in the sell predicted values dictionary is : " + str(len(lPredictedSellValuesDict)))
    if (numberOfLinesInSellPredictedValuesFile != len(lPredictedSellValuesDict)):
        print("Number of duplicate timestamps rejected in sell predicted values dictionary = " + str(numberOfLinesInSellPredictedValuesFile - len(lPredictedSellValuesDict)))
        #os._exit(-1)
    sys.stdout.flush()
#-----------------Getting predicted values into dictionary -------------------------------------
    for elements in lPredictedBuyValuesDict.keys():
        pPredictedValuesDict[elements] = {}
        pPredictedValuesDict[elements]['buy'] = lPredictedBuyValuesDict[elements]
        pPredictedValuesDict[elements]['sell'] = lPredictedSellValuesDict[elements] 
Ejemplo n.º 38
0
if "/" in pathAfterE:
    mainExperimentName = pathAfterE[:pathAfterE.index("/")]
else:
    mainExperimentName = pathAfterE

experimentName = os.path.basename(absPathOfExperimentName)
gTickSize = int(args.tickSize)
gMaxQty = int(args.orderQty)

startTimeList = args.startTime.split(";")
endTimeList = args.endTime.split(";")
initialFileName = []
for indexOfCL in range(0, len(startTimeList)):
    lInitialFileName = 'DummyTradeEngine-d.' + os.path.basename(
        os.path.abspath(args.d)
    ) + attribute.generateExtension(
    ) + '-l.' + startTimeList[indexOfCL] + "-" + endTimeList[
        indexOfCL] + "-tq." + args.orderQty + "-tarType" + args.targetType + attribute.generateExtension(
        ) + "-dte.7"
    initialFileName.append(lInitialFileName)

g_quantity_adjustment_list_for_sell = {}
g_quantity_adjustment_list_for_buy = {}


def functionToReadTargetFileToDictionary(pTargetValuesFile, pTargetValuesDict,
                                         pFileHeader):
    lNumberOfLinesInTargetValuesFile = 0
    for line in pTargetValuesFile:
        if pFileHeader == True:
            pFileHeader = False
            continue
Ejemplo n.º 39
0
Archivo: tradeE3.py Proyecto: grmte/ml
def getPredictedValuesIntoDict(pPredictedValuesDict):
    # The following will take care if args.e = "ob/e1/" or args.e = "ob/e1"
    dirName = args.pd.replace('/ro/','/wf/')
    config = ConfigObj(args.e+"/design.ini")
    target = config["target"]
    predictedValuesFileName = dirName+"/p/"+mainExperimentName+"/"+args.a + target.keys()[0] + '-td.' + os.path.basename(os.path.abspath(args.td)) + \
                                 '-dt.' + args.dt + '-targetClass.' + args.targetClass + '-f.' + experimentName +  "-wt." + args.wt+ attribute.generateExtension() + ".predictions"
    print("Predicted values file : "+ predictedValuesFileName)
    sys.stdout.flush()
    predictedValuesFile = open(predictedValuesFileName)
    fileHasHeader = True
    numberOfLinesInPredictedValuesFile = 0
    for line in predictedValuesFile:
        if fileHasHeader == True:
            fileHasHeader = False
            continue
        line=line.rstrip('\n')
        splitLine = line.split(',',2)
        timeStamp = float(splitLine[1])
        try:#TODO: remove this and then run the code to identify errors.
            predictedProb = float(splitLine[2])
        except:
            predictedProb = 0
        pPredictedValuesDict[timeStamp] = predictedProb
        numberOfLinesInPredictedValuesFile += 1
    print("Finished reading the predicted values file")    
    print("The number of elements in the predicted values dictionary is : " + str(len(pPredictedValuesDict)))
    if (numberOfLinesInPredictedValuesFile != len(pPredictedValuesDict)):
        print("Number of duplicate timestamps rejected = " + str(numberOfLinesInPredictedValuesFile - len(pPredictedValuesDict)))
        os._exit(-1)
    sys.stdout.flush()
Ejemplo n.º 40
0
    else:
        mainExperimentName = pathAfterE

    mainExperimentNameList.append(mainExperimentName)    
    experimentName = os.path.basename(absPathOfExperimentName)
    sys.path.append("./src/")
    sys.path.append("./ob/generators/")
    
    config = ConfigObj(experiment+"/design.ini")
    featureTargetFilePath = args.pd.replace('ro', 'wf')
    
    
    for feature in config["features-buy"]:
        lName = config["features-buy"][feature].replace('(','').replace(')','')
        if lName not in featureNames:
            lFeatureFile = featureTargetFilePath + "/f/" + lName+ attribute.generateExtension() + ".feature"
            featureFP = open(lFeatureFile, "rb")
            featureFpList.append(featureFP)
            featureNames.append(lName)
        
    for feature in config["features-sell"]:
        lName = config["features-sell"][feature].replace('(','').replace(')','')
        if lName not in featureNames:
            lFeatureFile = featureTargetFilePath + "/f/" + lName + attribute.generateExtension() + ".feature"
            featureFP = open(lFeatureFile, "rb")
            featureFpList.append(featureFP)
            featureNames.append(lName)
            
    dirName = args.pd.replace('/ro/','/wf/')
    targetSet = config['target']
    
Ejemplo n.º 41
0
    args.dt, "-iT", args.iT, "-oT", args.oT, "-sP", args.sP
], args.run, args.sequence)
if args.sequence == "dp":
    print dp.printGroupStatus()

#========Running the correlation R program=========================
allWorkingFileDirectories = attribute.getListOfTrainingDirectoriesNames(
    int(args.nDays), args.td.replace('/ro/', '/wf/'), args.iT)
allWorkingFileDirectoriesString = ";".join(allWorkingFileDirectories)
lCorrCommandList = []
if args.sequence == "dp":
    for l_training_day in allWorkingFileDirectories:
        lDate = os.path.basename(os.path.abspath(l_training_day))
        lFileName = l_exp_dir + "/corr-date-" + lDate + "-td." + os.path.basename(
            os.path.abspath(args.td)
        ) + "-dt." + args.dt + attribute.generateExtension() + ".r"
        lCorrCommandList.append([lFileName, '-d', l_training_day])
    utility.runListOfCommandsWithMaxUtlilizationOfWorkers(
        lCorrCommandList, args, "Day-wise Correlation", int(args.nComputers))
else:

    def scriptWrapperForDayWiseCorrelation(pTrainingDay):
        lDate = os.path.basename(os.path.abspath(pTrainingDay))
        lFileName = l_exp_dir + "/corr-date-" + lDate + "-td." + os.path.basename(
            os.path.abspath(args.td)
        ) + "-dt." + args.dt + attribute.generateExtension() + ".r"
        utility.runCommand([lFileName, '-d', pTrainingDay], args.run,
                           args.sequence)

    results = map(scriptWrapperForDayWiseCorrelation,
                  allWorkingFileDirectories)
Ejemplo n.º 42
0
 def scriptWrapperForDayWiseCorrelation(pTrainingDay):
     lDate = os.path.basename(os.path.abspath(pTrainingDay))
     lFileName = l_exp_dir + "/corr-date-" + lDate + "-td." + os.path.basename(os.path.abspath(args.td)) + "-dt." + args.dt + attribute.generateExtension() +".r"
     utility.runCommand([lFileName,'-d',pTrainingDay],args.run,args.sequence)
Ejemplo n.º 43
0
def main():
    parser = argparse.ArgumentParser(description='Generates predict.r which will use design.model to make predictions. Sample command is pGenForE.py -e ob/e1/')
    parser.add_argument('-e', required=True,help='Directory to find the experiement designs')
    parser.add_argument('-a', required=True,help='Algorithm name')
    parser.add_argument('-pd', required=True,help='Prediction directory')
    parser.add_argument('-td',required=True,help="Day on which it was trained")
    parser.add_argument('-dt',required=True,help="Number of days it was trained")
    parser.add_argument('-wt',required=True,help="exp/default")
    parser.add_argument('-targetClass',required=True,help="For which model was used ; binomial(target takes only true and false) / multinomial (target values takes more than 2 values)")
    parser.add_argument('-skipP',required=False,help="yes or no , If you want to regenerate already generated algorithm prediction file then make this value No")
    parser.add_argument('-s',required=False,help="Experiment sub folders")
    parser.add_argument('-iT',required=False,help='Instrument name')
    parser.add_argument('-sP',required=False,help='Strike price of instrument')
    parser.add_argument('-oT',required=False,help='Options Type')
    parser.add_argument('-double',required=False,help='Double training of in model')
    args = parser.parse_args()

    attribute.initializeInstDetails(args.iT,args.sP,args.oT)
    if args.skipP == None:
        args.skipP = "yes"
    if args.s == None:
        args.s = args.e

    print "\nRunning pGen.py to generate the predict script"
    print "Using the experiment folder " + args.e

    config = ConfigObj(args.s+"/design.ini")

    print "The config parameters that I am working with are"
    print config

    dirName=os.path.dirname(args.s)
    if args.a is None:
        algo ='glmnet'
    else:
        algo =args.a
    
    import pdb
    #pdb.set_trace()
    if args.double:
        rProgName = "predict" + algo + "-td." + os.path.basename(os.path.abspath(args.td)) + "-dt." + args.dt + "-pd." + os.path.basename(os.path.abspath(args.pd)) \
                    + "-wt." + args.wt+ attribute.generateExtension()   + "double.r"
    else:
        rProgName = "predict" + algo + "-td." + os.path.basename(os.path.abspath(args.td)) + "-dt." + args.dt + "-pd." + os.path.basename(os.path.abspath(args.pd)) \
                    + "-wt." + args.wt+ attribute.generateExtension()   + ".r"
    rProgLocation = dirName+'/'+rProgName
    rScript = open(rProgLocation,'w')

    rScript.write('#!/usr/bin/Rscript \n')
    predictDataDirectoryName = args.pd.replace('/ro/','/wf/')
    predictDataDirectoryName = predictDataDirectoryName + "/p/" + os.path.basename(os.path.dirname(args.e)) + "/"
    if not os.path.exists(predictDataDirectoryName):
        os.mkdir(predictDataDirectoryName)
    if(args.a == 'glmnet'):
        rScript.write('require (glmnet) \n')
    elif(args.a == 'randomForest'):
        rScript.write('require (randomForest) \n')
    rCodeGen.ForSetUpChecks(rScript)
    lAllFilePresent = True
    for target in config['target']:
        if args.double:
            predictionFileName = predictDataDirectoryName + "/" + args.a + target +'-td.' + os.path.basename(os.path.abspath(args.td)) \
            + '-dt.' + args.dt + '-targetClass.' + args.targetClass + '-f.' + os.path.basename(os.path.dirname(args.s)) + \
            "-wt." + args.wt+ attribute.generateExtension()  +"double.predictions"
        else:
            predictionFileName = predictDataDirectoryName + "/" + args.a + target +'-td.' + os.path.basename(os.path.abspath(args.td)) \
            + '-dt.' + args.dt + '-targetClass.' + args.targetClass + '-f.' + os.path.basename(os.path.dirname(args.s)) + \
            "-wt." + args.wt+ attribute.generateExtension()  +".predictions"           
        if os.path.isfile(predictionFileName) and ( args.skipP.lower() == "yes" ):
            continue
        else:
            lAllFilePresent = False
            break
    if lAllFilePresent == False:
        for target in config['target']:
            rCodeGen.ToReadFeatureFiles(rScript,config,target)
            rCodeGen.ForSanityChecks(rScript,config,target)
            if args.double:
                predictionFileName = predictDataDirectoryName + "/" + args.a + target +'-td.' + os.path.basename(os.path.abspath(args.td)) \
                + '-dt.' + args.dt + '-targetClass.' + args.targetClass + '-f.' + os.path.basename(os.path.dirname(args.s)) + \
                "-wt." + args.wt+ attribute.generateExtension()  +"double.predictions"
            else:
                predictionFileName = predictDataDirectoryName + "/" + args.a + target +'-td.' + os.path.basename(os.path.abspath(args.td)) \
                + '-dt.' + args.dt + '-targetClass.' + args.targetClass + '-f.' + os.path.basename(os.path.dirname(args.s)) + \
                "-wt." + args.wt+ attribute.generateExtension()  +".predictions"   
                            
            if not os.path.isfile(predictionFileName) or ( args.skipP.lower() == "no" ):
                if args.double:
                    lModelGeneratedAfterTraining = args.s + '/' + args.a + target + '-td.' + os.path.basename(os.path.abspath(args.td)) + '-dt.' + args.dt + '-targetClass.' + args.targetClass + "-wt." + args.wt + 'double.model'
                    rCodeGen.ForPredictions(rScript,config,args,args.s,target,2,"double")
                else:
                    lModelGeneratedAfterTraining = args.s + '/' + args.a + target + '-td.' + os.path.basename(os.path.abspath(args.td)) + '-dt.' + args.dt + '-targetClass.' + args.targetClass + "-wt." + args.wt + '.model' 
                    rCodeGen.ForPredictions(rScript,config,args,args.s,target)
                print lModelGeneratedAfterTraining
            else:
                print predictionFileName + "Already exists , not generating it again . If you want to generate it again then rerun it with -skipP no "
    rScript.close()
    print "Finished generating R prediction program: " + rProgLocation
    os.system("chmod +x "+rProgLocation)
Ejemplo n.º 44
0
 def scriptWrapperForPredictProgramRun(predictionDirAfterLastTD):
     scriptName=lExperimentFolderName+"/predict" + args.a + "-td." + os.path.basename(os.path.abspath(args.td)) + "-dt." + args.dt +"-pd."  +\
                 os.path.basename(os.path.abspath(predictionDirAfterLastTD)) + "-wt." + args.wt  + attribute.generateExtension() +".r"
     dirName = predictionDirAfterLastTD.replace('/ro/', '/wf/')
     utility.runCommand([scriptName, "-d", dirName], args.run,
                        args.sequence)
Ejemplo n.º 45
0
def main():
    parser = argparse.ArgumentParser(description='Generates train.r. A sample command is :- src/corrRGenForE.py -e ob/e/nsefut/CorExpHINDALCO/ -td ob/data/ro/nsefut/20141017/ -dt 10 -iT HINDALCO -sP -1 -oT 0')
    parser.add_argument('-e', required=True,help='Experiement folder to use to find the features and targets')
    parser.add_argument('-td',required=True,help="Day on which it was trained")
    parser.add_argument('-dt',required=True,help="Number of days it was trained")
    parser.add_argument('-iT',required=False,help='Instrument name')
    parser.add_argument('-sP',required=False,help='Strike price of instrument')
    parser.add_argument('-oT',required=False,help='Options Type')
    args = parser.parse_args()

    attribute.initializeInstDetails(args.iT,args.sP,args.oT)
    print "Using the experiment folder " + args.e
    print args.e+"/design.ini"
    config = ConfigObj(args.e+"/design.ini")

    print "The config parameters that I am working with are"
    print config
    dirName=os.path.dirname(args.e)+"/"
    trainingDaysDirectory = attribute.getListOfTrainingDirectoriesNames( int(args.dt) , args.td ,args.iT)
    for l_trainingday in trainingDaysDirectory:
        rProgName = "corr-date-"+ os.path.basename(os.path.abspath(l_trainingday)) +"-td." + os.path.basename(os.path.abspath(args.td)) + "-dt." + args.dt + attribute.generateExtension() +".r"
        rProgLocation = dirName+'/'+rProgName
        rScript = open(rProgLocation,'w')
        rScript.write('#!/usr/bin/Rscript \n')
        rCodeGen.ForSetUpChecks(rScript)
        lCorrelationFileName = dirName + '/correlation-coef-date-'+  os.path.basename(os.path.abspath(l_trainingday)) + '-td.' + os.path.basename(os.path.abspath(args.td))+ '-dt.' + args.dt + attribute.generateExtension() + ".coef" 
        rCodeGen.ToReadTargetFile(rScript,config)
        for target in config['target']:
            rCodeGen.ToFindCorrelationDatewiseAndPrintingToFile(rScript,config,target,lCorrelationFileName)
        rScript.close()
        os.system("chmod +x "+rProgLocation)
Ejemplo n.º 46
0
def getPredictedValuesIntoDict(pPredictedValuesDict):
    # The following will take care if args.e = "ob/e1/" or args.e = "ob/e1"
    dirName = args.pd.replace('/ro/','/wf/')
    config = ConfigObj(args.e+"/design.ini")
    target = config["target"]
    lPredictedBuyValuesDict = dict()
    predictedBuyValuesFileName = dirName+"/p/"+mainExperimentName+"/"+args.a + target.keys()[0] + '-td.' + os.path.basename(os.path.abspath(args.td)) + \
                                 '-dt.' + args.dt + '-targetClass.' + args.targetClass + '-f.' + experimentName + "-wt." + args.wt+ attribute.generateExtension() + ".predictions"
    print("Buy Predicted values file : "+ predictedBuyValuesFileName)
    sys.stdout.flush()
    predictedBuyValuesFile = open(predictedBuyValuesFileName)
    fileHasHeader = True
    numberOfLinesInBuyPredictedValuesFile = functionToReadPredictionFileToDictionary(predictedBuyValuesFile,lPredictedBuyValuesDict,fileHasHeader)
    print("Finished reading the buy predicted values file")    
    print("The number of elements in the buy predicted values dictionary is : " + str(len(lPredictedBuyValuesDict)))
    if (numberOfLinesInBuyPredictedValuesFile != len(lPredictedBuyValuesDict)):
        print("Number of duplicate time stamps rejected in buy predicted values dictionary = " + str(numberOfLinesInBuyPredictedValuesFile - len(lPredictedBuyValuesDict)))
#         os._exit(-1)
    sys.stdout.flush()

    lPredictedSellValuesDict = dict()
    predictedSellValuesFileName = dirName+"/p/"+mainExperimentName+"/"+args.a + target.keys()[1] + '-td.' + os.path.basename(os.path.abspath(args.td)) + \
                                 '-dt.' + args.dt + '-targetClass.' + args.targetClass + '-f.' + experimentName +  "-wt." + args.wt + attribute.generateExtension()+ ".predictions"
    print("Sell Predicted values file : "+ predictedSellValuesFileName)
    sys.stdout.flush()
    predictedSellValuesFile = open(predictedSellValuesFileName)
    fileHasHeader = True
    numberOfLinesInSellPredictedValuesFile = functionToReadPredictionFileToDictionary(predictedSellValuesFile,lPredictedSellValuesDict,fileHasHeader)
    print("Finished reading the sell predicted values file")    
    print("The number of elements in the sell predicted values dictionary is : " + str(len(lPredictedSellValuesDict)))
    if (numberOfLinesInSellPredictedValuesFile != len(lPredictedSellValuesDict)):
        print("Number of duplicate timestamps rejected in sell predicted values dictionary = " + str(numberOfLinesInSellPredictedValuesFile - len(lPredictedSellValuesDict)))
#         os._exit(-1)
    sys.stdout.flush()
#-----------------Getting predicted values into dictionary -------------------------------------
    for elements in lPredictedBuyValuesDict.keys():
        pPredictedValuesDict[elements] = {}
        pPredictedValuesDict[elements]['buy'] = lPredictedBuyValuesDict[elements]
        pPredictedValuesDict[elements]['sell'] = lPredictedSellValuesDict[elements] 
Ejemplo n.º 47
0
    pLPerLotLong=(averageCloseSellPrice - averageOpenBuyPrice)* 1000
    print("1 lot has 1000 qty's so P/L Short per lot is: " + str(pLPerLotShort), file = outputFile)
    print("1 lot has 1000 qty's so P/L Long per lot is: " + str(pLPerLotLong), file = outputFile)
    print("P/L for Short trading 10 lots is: " + str(pLPerLotShort * 10), file = outputFile)
    print("P/L for Long trading 10 lots is: " + str(pLPerLotLong * 10), file = outputFile)


if __name__ == "__main__":
    tStart = datetime.now()
    dirName = args.pd.replace('/ro/','/rs/')
    checkAllFilesAreExistOrNot = 'false'
    
    lWFDirName = args.pd.replace('/ro/','/wf/')
    if args.double:
        predictedBuyValuesFileName = lWFDirName+"/p/"+mainExperimentName+"/"+args.a + 'buy' + '-td.' + os.path.basename(os.path.abspath(args.td)) + '-dt.' + \
        args.dt + '-targetClass.' + args.targetClass + '-f.' + experimentName + "-wt." + args.wt+ attribute.generateExtension() + "double.predictions"
        
        predictedSellValuesFileName = lWFDirName+"/p/"+mainExperimentName+"/"+args.a + 'sell' + '-td.' + os.path.basename(os.path.abspath(args.td)) + '-dt.' +\
        args.dt + '-targetClass.' + args.targetClass + '-f.' + experimentName + "-wt." + args.wt+ attribute.generateExtension() + "double.predictions"
    else:
        predictedBuyValuesFileName = lWFDirName+"/p/"+mainExperimentName+"/"+args.a + 'buy' + '-td.' + os.path.basename(os.path.abspath(args.td)) + '-dt.' + \
        args.dt + '-targetClass.' + args.targetClass + '-f.' + experimentName + "-wt." + args.wt+ attribute.generateExtension() + ".predictions"
        
        predictedSellValuesFileName = lWFDirName+"/p/"+mainExperimentName+"/"+args.a + 'sell' + '-td.' + os.path.basename(os.path.abspath(args.td)) + '-dt.' +\
        args.dt + '-targetClass.' + args.targetClass + '-f.' + experimentName + "-wt." + args.wt+ attribute.generateExtension() + ".predictions"        

    lEntryClList = args.entryCL.split(";")
    lExitClList = args.exitCL.split(";")
    if len(lEntryClList)!= len(lExitClList):
        print("Len of entry and exit list does match. Entry List length = " , len(lEntryClList) , " and ExitCL List length = " , len(lExitClList))
        os._exit(-1)
Ejemplo n.º 48
0
def main():
    parser = argparse.ArgumentParser(description='Generates train.r. A sample command is mGenForE.py -e ob/e1/ ')
    parser.add_argument('-e', required=True,help='Experiement folder to use to find the features and targets')
    parser.add_argument('-a', required=True,help='Algorithm name')
    parser.add_argument('-targetClass',required=True,help="binomial(target takes only true and false) / multinomial (target values takes more than 2 values)")
    parser.add_argument('-skipM',required=False,help="yes or no , If you want to regenerate already generated algorithm model file then make this value No")
    parser.add_argument('-td',required=True,help="Day on which it was trained")
    parser.add_argument('-dt',required=True,help="Number of days it was trained")
    parser.add_argument('-wt',required=True,help="default/exp , weight type to be given to different days")
    parser.add_argument('-iT',required=False,help='Instrument name')
    parser.add_argument('-sP',required=False,help='Strike price of instrument')
    parser.add_argument('-oT',required=False,help='Options Type')
    parser.add_argument('-double',required=False,help='Double training of in model')
    args = parser.parse_args()

    attribute.initializeInstDetails(args.iT,args.sP,args.oT)
    if args.skipM == None:
        args.skipM = "yes"

    print "Using the experiment folder " + args.e

    config = ConfigObj(args.e+"/design.ini")

    print "The config parameters that I am working with are"
    print config

    dirName=os.path.dirname(args.e)+"/"

    algo = rCodeGen.getAlgoName(args)
    if args.double:
        rProgName = "train" + algo + "-td." + os.path.basename(os.path.abspath(args.td)) + "-dt." + args.dt + "-wt." + args.wt + attribute.generateExtension() +"double.r"
    else:
        rProgName = "train" + algo + "-td." + os.path.basename(os.path.abspath(args.td)) + "-dt." + args.dt + "-wt." + args.wt + attribute.generateExtension() +".r"
    rProgLocation = dirName+'/'+rProgName
    rScript = open(rProgLocation,'w')
    rScript.write('#!/usr/bin/Rscript \n')
    if(algo == 'glmnet'):
        rScript.write('require (glmnet) \n')
    elif(algo == 'randomForest'):
        rScript.write('require (randomForest) \n')
    elif(algo == 'mda'):
        rScript.write('require (mda) \n')
    rCodeGen.ForSetUpChecks(rScript)
    lAllFilePresent = True
    for target in config['target']:
        if args.double:
            lModelGeneratedAfterTraining = dirName + '/' + algo + target + '-td.' + os.path.basename(os.path.abspath(args.td))\
                             + '-dt.' + args.dt + '-targetClass.' + args.targetClass + "-wt." + args.wt+ attribute.generateExtension()  +'double.model'
        else:
            lModelGeneratedAfterTraining = dirName + '/' + algo + target + '-td.' + os.path.basename(os.path.abspath(args.td))\
                             + '-dt.' + args.dt + '-targetClass.' + args.targetClass + "-wt." + args.wt+ attribute.generateExtension()  +'.model'           
        if os.path.isfile(lModelGeneratedAfterTraining) and ( args.skipM.lower() == "yes" ):
            continue
        else:
            lAllFilePresent = False
            break
    if lAllFilePresent == False:
        rCodeGen.ToReadTargetFile(rScript,config)
        rCodeGen.ForWtVectorGeneration(rScript,args.wt.lower())

        for target in config['target']:
            rCodeGen.ToReadFeatureFiles(rScript,config,target)
            rCodeGen.ForSanityChecks(rScript,config,target)
            if args.double:
                lModelGeneratedAfterTraining = dirName + '/' + algo + target + '-td.' + os.path.basename(os.path.abspath(args.td))\
                                 + '-dt.' + args.dt + '-targetClass.' + args.targetClass + "-wt." + args.wt+ attribute.generateExtension()  +'double.model'
                lTempModelName = dirName + '/' + algo + target + '-td.' + os.path.basename(os.path.abspath(args.td))\
                                 + '-dt.' + args.dt + '-targetClass.' + args.targetClass + "-wt." + args.wt+ attribute.generateExtension()  +'.model'
            else:
                lModelGeneratedAfterTraining = dirName + '/' + algo + target + '-td.' + os.path.basename(os.path.abspath(args.td))\
                                 + '-dt.' + args.dt + '-targetClass.' + args.targetClass + "-wt." + args.wt+ attribute.generateExtension()  +'.model' 
            if os.path.isfile(lModelGeneratedAfterTraining) and ( args.skipM.lower() == "yes" ):
                print "Model File " + lModelGeneratedAfterTraining + " already exists . So it will not be formed again . So it will not be formed again . If you want to re-generate model then re-run with -skipM=No"
            else:
                rCodeGen.ToCreateDataFrameForTraining(rScript,config,target)
                if args.double:
                    if os.path.isfile(lTempModelName):
                        rCodeGen.ForLoadingModel(rScript,args,dirName,target,config)
                    else:
                        rCodeGen.ForTraining(rScript,args,config,target)
                    rCodeGen.forPreparingWtVectorForDoubleTraining(rScript,args,target)
                    rCodeGen.saveTrainingModel(rScript,args,dirName,target,"double")
                else:
                    rCodeGen.ForTraining(rScript,args,config,target)
                    rCodeGen.saveTrainingModel(rScript,args,dirName,target)


    rScript.close()
    print "Finished generating R training program: " + rProgLocation
    os.system("chmod +x "+rProgLocation)
Ejemplo n.º 49
0
                                         index("/nsefut/") + 8:]
elif 'nseopt' in absPathOfExperimentName:
    pathAfterE = absPathOfExperimentName[absPathOfExperimentName.
                                         index("/nseopt/") + 8:]

if "/" in pathAfterE:
    mainExperimentName = pathAfterE[:pathAfterE.index("/")]
else:
    mainExperimentName = pathAfterE

experimentName = os.path.basename(absPathOfExperimentName)
gTickSize = int(args.tickSize)
gMaxQty = int(args.orderQty)

initialFileName = 'TradeOnTarget-d.' + os.path.basename(os.path.abspath(
    args.d)) + attribute.generateExtension(
    ) + "-tq." + args.orderQty + attribute.generateExtension() + "-dte.7"

g_quantity_adjustment_list_for_sell = {}
g_quantity_adjustment_list_for_buy = {}
gOpenBuyPrice = 0.0
gCloseSellPrice = 0.0
gOpenSellPrice = 0.0
gCloseBuyPrice = 0.0


def functionToReadTargetFileToDictionary(pTargetValuesFile, pTargetValuesDict,
                                         pFileHeader):
    lNumberOfLinesInTargetValuesFile = 0
    for line in pTargetValuesFile:
        if pFileHeader == True:
            pFileHeader = False
Ejemplo n.º 50
0
    print("1 lot has 1000 qty's so P/L Long per lot is: " + str(pLPerLotLong),
          file=outputFile)
    print("P/L for Short trading 10 lots is: " + str(pLPerLotShort * 10),
          file=outputFile)
    print("P/L for Long trading 10 lots is: " + str(pLPerLotLong * 10),
          file=outputFile)


if __name__ == "__main__":
    tStart = datetime.now()
    dirName = args.pd.replace('/ro/', '/rs/')
    checkAllFilesAreExistOrNot = 'false'

    lWFDirName = args.pd.replace('/ro/', '/wf/')
    predictedBuyValuesFileName = lWFDirName+"/p/"+mainExperimentName+"/"+args.a + 'buy' + '-td.' + os.path.basename(os.path.abspath(args.td)) + '-dt.' + \
    args.dt + '-targetClass.' + args.targetClass + '-f.' + buyExperimentName + "-wt." + args.wt+ attribute.generateExtension() + ".predictions"

    predictedSellValuesFileName = lWFDirName+"/p/"+mainExperimentName+"/"+args.a + 'sell' + '-td.' + os.path.basename(os.path.abspath(args.td)) + '-dt.' +\
    args.dt + '-targetClass.' + args.targetClass + '-f.' + sellExperimentName + "-wt." + args.wt+ attribute.generateExtension() + ".predictions"

    lEntryClList = args.entryCL.split(";")
    lExitClList = args.exitCL.split(";")
    if len(lEntryClList) != len(lExitClList):
        print("Len of entry and exit list does match. Entry List length = ",
              len(lEntryClList), " and ExitCL List length = ",
              len(lExitClList))
        os._exit(-1)
    lengthOfList = len(lEntryClList)

    lMinOfExitCl = 9999.000
    fileNameList = []
Ejemplo n.º 51
0
        for designFile in designFiles:
            lExperimentFolderName = os.path.dirname(designFile) + "/"
            experimentFolderDirectory.append(lExperimentFolderName)
        indexOfFeatures += 1

else:
    experimentFolderDirectory.append(args.e)

print "Experiment Folder Lsit ", experimentFolderDirectory
#==========Running the model in serial mode and rest thing in serial , lp or dp mode as given
for lExperimentFolderName in experimentFolderDirectory:
    #    utility.runCommand(["mRGenForE.py","-e",lExperimentFolderName,"-a",args.a,"-targetClass",args.targetClass,"-skipM",args.skipM,"-td",args.td, "-dt" , \
    #                                 args.dt , '-wt' , args.wt,"-iT",args.iT,"-oT",args.oT,"-sP",args.sP ] , args.run , "serial")
    scriptName = lExperimentFolderName + "/train" + args.a + "-td." + os.path.basename(
        os.path.abspath(args.td)
    ) + "-dt." + args.dt + "-wt." + args.wt + attribute.generateExtension(
    ) + ".r"
    trainingDataListString = ";".join(trainingDaysDirectory).replace(
        "/ro/", "/wf/")

    #    utility.runCommand([scriptName,"-d",trainingDataListString] , args.run , "serial")

    if args.sequence == "dp":
        #=========Putting all command in alist-========
        lRCodeGenCommandList = []
        lPGenRCodeList = []
        lTradingCommandList = []
        for i in range(len(predictionDaysDirectory)):
            predictionDirAfterLastTD = predictionDaysDirectory[i]
            lRCodeGenCommandList.append((["pRGenForE.py","-e",args.e,"-s",lExperimentFolderName,"-a",args.a,"-skipP",args.skipP,"-td",args.td , "-pd" , predictionDirAfterLastTD , "-dt" , args.dt ,\
                             "-targetClass" , args.targetClass , '-wt' , args.wt ,"-iT",args.iT,"-oT",args.oT,"-sP",args.sP]))
            scriptName=lExperimentFolderName+"/predict" + args.a + "-td." + os.path.basename(os.path.abspath(args.td)) + "-dt." + args.dt +"-pd."  +\
Ejemplo n.º 52
0
def main():
    parser = argparse.ArgumentParser(
        description=
        'Generates predict.r which will use design.model to make predictions. Sample command is pGenForE.py -e ob/e1/'
    )
    parser.add_argument('-e',
                        required=True,
                        help='Directory to find the experiement designs')
    parser.add_argument('-a', required=True, help='Algorithm name')
    parser.add_argument('-pd', required=True, help='Prediction directory')
    parser.add_argument('-td',
                        required=True,
                        help="Day on which it was trained")
    parser.add_argument('-dt',
                        required=True,
                        help="Number of days it was trained")
    parser.add_argument('-wt', required=True, help="exp/default")
    parser.add_argument(
        '-targetClass',
        required=True,
        help=
        "For which model was used ; binomial(target takes only true and false) / multinomial (target values takes more than 2 values)"
    )
    parser.add_argument(
        '-skipP',
        required=False,
        help=
        "yes or no , If you want to regenerate already generated algorithm prediction file then make this value No"
    )
    parser.add_argument('-s', required=False, help="Experiment sub folders")
    parser.add_argument('-iT', required=False, help='Instrument name')
    parser.add_argument('-sP',
                        required=False,
                        help='Strike price of instrument')
    parser.add_argument('-oT', required=False, help='Options Type')
    parser.add_argument('-double',
                        required=False,
                        help='Double training of in model')
    args = parser.parse_args()

    attribute.initializeInstDetails(args.iT, args.sP, args.oT)
    if args.skipP == None:
        args.skipP = "yes"
    if args.s == None:
        args.s = args.e

    print "\nRunning pGen.py to generate the predict script"
    print "Using the experiment folder " + args.e

    config = ConfigObj(args.s + "/design.ini")

    print "The config parameters that I am working with are"
    print config

    dirName = os.path.dirname(args.s)
    if args.a is None:
        algo = 'glmnet'
    else:
        algo = args.a

    import pdb
    #pdb.set_trace()
    if args.double:
        rProgName = "predict" + algo + "-td." + os.path.basename(os.path.abspath(args.td)) + "-dt." + args.dt + "-pd." + os.path.basename(os.path.abspath(args.pd)) \
                    + "-wt." + args.wt+ attribute.generateExtension()   + "double.r"
    else:
        rProgName = "predict" + algo + "-td." + os.path.basename(os.path.abspath(args.td)) + "-dt." + args.dt + "-pd." + os.path.basename(os.path.abspath(args.pd)) \
                    + "-wt." + args.wt+ attribute.generateExtension()   + ".r"
    rProgLocation = dirName + '/' + rProgName
    rScript = open(rProgLocation, 'w')

    rScript.write('#!/usr/bin/Rscript \n')
    predictDataDirectoryName = args.pd.replace('/ro/', '/wf/')
    predictDataDirectoryName = predictDataDirectoryName + "/p/" + os.path.basename(
        os.path.dirname(args.e)) + "/"
    if not os.path.exists(predictDataDirectoryName):
        os.mkdir(predictDataDirectoryName)
    if (args.a == 'glmnet'):
        rScript.write('require (glmnet) \n')
    elif (args.a == 'randomForest'):
        rScript.write('require (randomForest) \n')
    rCodeGen.ForSetUpChecks(rScript)
    lAllFilePresent = True
    for target in config['target']:
        if args.double:
            predictionFileName = predictDataDirectoryName + "/" + args.a + target +'-td.' + os.path.basename(os.path.abspath(args.td)) \
            + '-dt.' + args.dt + '-targetClass.' + args.targetClass + '-f.' + os.path.basename(os.path.dirname(args.s)) + \
            "-wt." + args.wt+ attribute.generateExtension()  +"double.predictions"
        else:
            predictionFileName = predictDataDirectoryName + "/" + args.a + target +'-td.' + os.path.basename(os.path.abspath(args.td)) \
            + '-dt.' + args.dt + '-targetClass.' + args.targetClass + '-f.' + os.path.basename(os.path.dirname(args.s)) + \
            "-wt." + args.wt+ attribute.generateExtension()  +".predictions"
        if os.path.isfile(predictionFileName) and (args.skipP.lower()
                                                   == "yes"):
            continue
        else:
            lAllFilePresent = False
            break
    if lAllFilePresent == False:
        for target in config['target']:
            rCodeGen.ToReadFeatureFiles(rScript, config, target)
            rCodeGen.ForSanityChecks(rScript, config, target)
            if args.double:
                predictionFileName = predictDataDirectoryName + "/" + args.a + target +'-td.' + os.path.basename(os.path.abspath(args.td)) \
                + '-dt.' + args.dt + '-targetClass.' + args.targetClass + '-f.' + os.path.basename(os.path.dirname(args.s)) + \
                "-wt." + args.wt+ attribute.generateExtension()  +"double.predictions"
            else:
                predictionFileName = predictDataDirectoryName + "/" + args.a + target +'-td.' + os.path.basename(os.path.abspath(args.td)) \
                + '-dt.' + args.dt + '-targetClass.' + args.targetClass + '-f.' + os.path.basename(os.path.dirname(args.s)) + \
                "-wt." + args.wt+ attribute.generateExtension()  +".predictions"

            if not os.path.isfile(predictionFileName) or (args.skipP.lower()
                                                          == "no"):
                if args.double:
                    lModelGeneratedAfterTraining = args.s + '/' + args.a + target + '-td.' + os.path.basename(
                        os.path.abspath(args.td)
                    ) + '-dt.' + args.dt + '-targetClass.' + args.targetClass + "-wt." + args.wt + 'double.model'
                    rCodeGen.ForPredictions(rScript, config, args, args.s,
                                            target, 2, "double")
                else:
                    lModelGeneratedAfterTraining = args.s + '/' + args.a + target + '-td.' + os.path.basename(
                        os.path.abspath(args.td)
                    ) + '-dt.' + args.dt + '-targetClass.' + args.targetClass + "-wt." + args.wt + '.model'
                    rCodeGen.ForPredictions(rScript, config, args, args.s,
                                            target)
                print lModelGeneratedAfterTraining
            else:
                print predictionFileName + "Already exists , not generating it again . If you want to generate it again then rerun it with -skipP no "
    rScript.close()
    print "Finished generating R prediction program: " + rProgLocation
    os.system("chmod +x " + rProgLocation)
Ejemplo n.º 53
0
    
if "/" in pathAfterE:
    mainExperimentName = pathAfterE[:pathAfterE.index("/")]
else:
    mainExperimentName = pathAfterE
    
experimentName = os.path.basename(absPathOfExperimentName)
gTickSize = int(args.tickSize)
gMaxQty = int(args.orderQty)

totalEntryCL = args.entryCL.split(";")
totalExitCL = args.exitCL.split(";")
initialFileName = []
for indexOfCL in range(0,len(totalEntryCL)):
    lInitialFileName = args.a + '-td.' + os.path.basename(os.path.abspath(args.td)) + \
                   '-dt.' + args.dt + '-targetClass.' + args.targetClass + '-f.' + experimentName + "-wt." + args.wt+ attribute.generateExtension() + \
                   '-l.'+totalEntryCL[indexOfCL]+"-"+totalExitCL[indexOfCL] + "-tq." + args.orderQty + "-te.7" 
    initialFileName.append(lInitialFileName)
    

g_quantity_adjustment_list_for_sell = {}
g_quantity_adjustment_list_for_buy = {}

def functionToReadPredictionFileToDictionary(pPredictedValuesFile,pPredictedValuesDict,pFileHeader):
    lNumberOfLinesInPredictedValuesFile = 0
    for line in pPredictedValuesFile:
        if pFileHeader == True:
            pFileHeader = False
            continue
        line=line.rstrip('\n')
        splitLine = line.split(',')
Ejemplo n.º 54
0
    else:
        mainExperimentName = pathAfterE

    mainExperimentNameList.append(mainExperimentName)
    experimentName = os.path.basename(absPathOfExperimentName)
    sys.path.append("./src/")
    sys.path.append("./ob/generators/")

    config = ConfigObj(experiment + "/design.ini")
    featureTargetFilePath = args.pd.replace('ro', 'wf')

    for feature in config["features-buy"]:
        lName = config["features-buy"][feature].replace('(',
                                                        '').replace(')', '')
        if lName not in featureNames:
            lFeatureFile = featureTargetFilePath + "/f/" + lName + attribute.generateExtension(
            ) + ".feature"
            featureFP = open(lFeatureFile, "rb")
            featureFpList.append(featureFP)
            featureNames.append(lName)

    for feature in config["features-sell"]:
        lName = config["features-sell"][feature].replace('(',
                                                         '').replace(')', '')
        if lName not in featureNames:
            lFeatureFile = featureTargetFilePath + "/f/" + lName + attribute.generateExtension(
            ) + ".feature"
            featureFP = open(lFeatureFile, "rb")
            featureFpList.append(featureFP)
            featureNames.append(lName)

    dirName = args.pd.replace('/ro/', '/wf/')
Ejemplo n.º 55
0
def main():
    parser = argparse.ArgumentParser(description='Generates train.r. A sample command is mGenForE.py -e ob/e1/ ')
    parser.add_argument('-e', required=True,help='Experiement folder to use to find the features and targets')
    parser.add_argument('-a', required=True,help='Algorithm name')
    parser.add_argument('-s', required=True,help='Location of the folder containing all the sub experiments')
    parser.add_argument('-targetClass',required=True,help="binomial(target takes only true and false) / multinomial (target values takes more than 2 values)")
    parser.add_argument('-skipM',required=False,help="yes or no , If you want to regenerate already generated algorithm model file then make this value No")
    parser.add_argument('-td',required=True,help="Day on which it was trained")
    parser.add_argument('-dt',required=True,help="Number of days it was trained")
    parser.add_argument('-wt',required=True,help="default/exp , weight type to be given to different days")
    parser.add_argument('-iT',required=False,help='Instrument name')
    parser.add_argument('-sP',required=False,help='Strike price of instrument')
    parser.add_argument('-oT',required=False,help='Options Type')
    args = parser.parse_args()

    attribute.initializeInstDetails(args.iT,args.sP,args.oT)
    if args.skipM == None:
        args.skipM = "yes"

    print "Using the experiment folder " + args.e
    
    config = ConfigObj(args.e+"/design.ini")

    print "The config parameters that I am working with are"
    print config 

    dirName=os.path.dirname(args.e)

    algo = rCodeGen.getAlgoName(args)

    args.s = args.s + "/"
    rProgName = "train" + algo + "-td." + os.path.basename(os.path.abspath(args.td)) + "-dt." + args.dt + "-wt." + args.wt+ attribute.generateExtension()  +\
                "-For"+os.path.basename(os.path.dirname(args.s))+"SubE.r"
    rProgLocation = dirName+'/'+rProgName
    rScript = open(rProgLocation,'w')

    rScript.write('#!/usr/bin/Rscript \n')

    if(algo == 'glmnet'):
        rScript.write('require (glmnet) \n')
    elif(algo == 'randomForest'):
        rScript.write('require (randomForest) \n')
    elif(algo == 'mda'):
        rScript.write('require (mda) \n')

    rCodeGen.ForSetUpChecks(rScript)
    rCodeGen.ToReadTargetFile(rScript,config)
    rCodeGen.ForWtVectorGeneration(rScript,args.wt.lower())
    for target in config['target']:
        rCodeGen.ToReadFeatureFiles(rScript,config,target)
        rCodeGen.ForSanityChecks(rScript,config,target)
    
    designFiles = utility.list_files(args.s)

    for designFile in designFiles:
        print "Generating r code for " + designFile
        rScript.write('\n\nprint ("Running r code for ' + designFile + '")\n')
        config = ConfigObj(designFile)
        for target in config['target']:
            lModelGeneratedAfterTraining = os.path.dirname(designFile) + '/' + algo + target + '-td.' + os.path.basename(os.path.abspath(args.td)) + \
            '-dt.' + args.dt + '-targetClass.' + args.targetClass + "-wt." + args.wt+ attribute.generateExtension()  + '.model'
            if os.path.isfile(lModelGeneratedAfterTraining)and ( args.skipM.lower() == "yes" ):
                print "Model File " + lModelGeneratedAfterTraining + " already exists . So it will not be formed again . If you want to re-generate model then re-run with -skipM=No"
            else:
                rCodeGen.ToCreateDataFrameForTraining(rScript,config,target)
                rCodeGen.ForTraining(rScript,args,config,target)
                rCodeGen.saveTrainingModel(rScript,args,os.path.dirname(designFile),target)
    rScript.write('rm(list=ls())')
    rScript.close()
    print "Finished generating R training program: " + rProgLocation
    os.system("chmod +x "+rProgLocation)
Ejemplo n.º 56
0
Archivo: rCodeGen.py Proyecto: grmte/ml
def ToReadFeatureFiles(rScript,config,targetVariable,pUseWhichArgumentForData=2):
    features = config["features-"+targetVariable]
    rScript.write('\nprint ("Section3: Read feature files") \n')
    if pUseWhichArgumentForData == 4:
        rScript.write('lDirectorySet<-strsplit(args[4],";",fixed=TRUE,useBytes=FALSE)\n')
    else:
        rScript.write('lDirectorySet<-strsplit(args[2],";",fixed=TRUE,useBytes=FALSE)\n')
    for feature in features:
        userFriendlyName = features[feature]
        userFriendlyName = userFriendlyName.replace('[','')
        userFriendlyName = userFriendlyName.replace(']','')
        userFriendlyName = userFriendlyName.replace('(','')
        userFriendlyName = userFriendlyName.replace(')','')
        featureNameWithoutBrackets = features[feature].replace('(','').replace(')','') + attribute.generateExtension() 
        rScript.write('lFlag=FALSE\n')
        rScript.write('for (file in lDirectorySet[[1]]){\n')
        rScript.write('    if (!lFlag){\n')
        rScript.write('        load(paste(file,"/f/'+featureNameWithoutBrackets+'.bin",sep=""))\n')
        rScript.write('        '+feature+targetVariable+'<-get("'+userFriendlyName+'")' + skipRowCode + ' \n')
        rScript.write('        rm("' + userFriendlyName + '")\n')
        rScript.write('        lFlag=TRUE\n')
        rScript.write('    }\n')
        rScript.write('    else {\n')  
        rScript.write('        load(paste(file,"/f/'+featureNameWithoutBrackets+'.bin",sep=""))\n')
        rScript.write('        temp<-get("'+userFriendlyName+ '")' + skipRowCode + '\n')  
        rScript.write('        rm("' + userFriendlyName + '")\n')
        rScript.write('        '+feature+targetVariable+'<-rbind('+feature+targetVariable+',temp)\n')    
        rScript.write('        rm(temp)\n')
        rScript.write('    }\n')
        rScript.write('    print ("Reading '+ featureNameWithoutBrackets +'.feature' + '") \n')
        rScript.write('}\n')
Ejemplo n.º 57
0
Archivo: rCodeGen.py Proyecto: grmte/ml
def ToFindCorrelationDatewiseAndPrintingToFile(rScript,config,pTargetVariableKey,pFileName):
    features = config["features-"+pTargetVariableKey]
    rScript.write('\nprint ("Section6: To Find Correlation For ' +pTargetVariableKey  +'") \n')
    rScript.write('string_intercept = paste("CorrelationCoeficient Of ","' + pTargetVariableKey + '" , ":- ","\\n",sep="")\n')
    rScript.write('cat(string_intercept,file="'+ pFileName + '",sep="",append=TRUE)\n')
    for feature in features:
        userFriendlyName = features[feature]
        userFriendlyName = userFriendlyName.replace('[','')
        userFriendlyName = userFriendlyName.replace(']','')
        userFriendlyName = userFriendlyName.replace('(','')
        userFriendlyName = userFriendlyName.replace(')','')
        featureNameWithoutBrackets = features[feature].replace('(','').replace(')','') + attribute.generateExtension() 
        rScript.write('lFlag=FALSE\n')
        rScript.write('for (file in lDirectorySet[[1]]){\n')
        rScript.write('    if (!lFlag){\n')
        rScript.write('        load(paste(file,"/f/'+featureNameWithoutBrackets+'.bin",sep=""))\n')
        rScript.write('        '+feature+pTargetVariableKey+'<-get("'+userFriendlyName+'")' + skipRowCode + ' \n')
        rScript.write('        rm("' + userFriendlyName + '")\n')
        rScript.write('        lFlag=TRUE\n')
        rScript.write('    }\n')
        rScript.write('    else {\n')  
        rScript.write('        load(paste(file,"/f/'+featureNameWithoutBrackets+'.bin",sep=""))\n')
        rScript.write('        temp<-get("'+userFriendlyName+ '")' + skipRowCode + '\n')  
        rScript.write('        rm("' + userFriendlyName + '")\n')
        rScript.write('        '+feature+pTargetVariableKey+'<-rbind('+feature+pTargetVariableKey+',temp)\n')    
        rScript.write('        rm(temp)\n')
        rScript.write('    }\n')
        rScript.write('    print ("Reading '+ featureNameWithoutBrackets +'.feature' + '") \n')
        rScript.write('}\n')
        userFriendlyName = features[feature] 
        rScript.write('tempXY <- sum('+pTargetVariableKey+'[,2] * '+ feature+pTargetVariableKey+'[,2] )\n')
        rScript.write('tempY2 <- sum('+pTargetVariableKey+'[,2] ^ 2 )\n')
        rScript.write('tempX2 <- sum('+feature+pTargetVariableKey+'[,2] ^ 2 )\n')
        rScript.write('tempY <- sum('+pTargetVariableKey+'[,2] )\n')
        rScript.write('tempX <- sum('+feature+pTargetVariableKey+'[,2] )\n')
        rScript.write('n <- length('+feature+pTargetVariableKey+'[,2] )\n')
        rScript.write('string_intercept = paste("'+ userFriendlyName +'_XY" ,"=",toString(tempXY),"\\n"')
        rScript.write(',"'+ userFriendlyName +'_Y2" ,"=",toString(tempY2),"\\n"')
        rScript.write(',"'+ userFriendlyName +'_X2" ,"=",toString(tempX2),"\\n"')
        rScript.write(',"'+ userFriendlyName +'_Y" ,"=",toString(tempY),"\\n"')
        rScript.write(',"'+ userFriendlyName +'_X" ,"=",toString(tempX),"\\n"')
        rScript.write(',"'+ userFriendlyName +'_n" ,"=",toString(n),"\\n"')
        rScript.write(',sep="")\n')
        rScript.write('cat(string_intercept,file="'+ pFileName + '",sep="",append=TRUE)\n')
        rScript.write('rm('+ feature+pTargetVariableKey + ')\n')
    rScript.write('string_intercept = paste("\\n","\\n",sep="")\n')
    rScript.write('cat(string_intercept,file="'+ pFileName + '",sep="",append=TRUE)\n')
Ejemplo n.º 58
0
    else:
        mainExperimentName = pathAfterE

    mainExperimentNameList.append(mainExperimentName)    
    experimentName = os.path.basename(absPathOfExperimentName)
    sys.path.append("./src/")
    sys.path.append("./ob/generators/")
    
    config = ConfigObj(experiment+"/design.ini")
    featureTargetFilePath = args.pd.replace('ro', 'wf')
    
    
    for feature in config["features-buy"]:
        lName = config["features-buy"][feature].replace('(','').replace(')','')
        if lName not in featureNames:
            lFeatureFile = featureTargetFilePath + "/f/" + lName+ attribute.generateExtension() + ".feature"
            featureFP = open(lFeatureFile, "rb")
            featureFpList.append(featureFP)
            featureNames.append(lName)
        
    for feature in config["features-sell"]:
        lName = config["features-sell"][feature].replace('(','').replace(')','')
        if lName not in featureNames:
            lFeatureFile = featureTargetFilePath + "/f/" + lName + attribute.generateExtension() + ".feature"
            featureFP = open(lFeatureFile, "rb")
            featureFpList.append(featureFP)
            featureNames.append(lName)
            
    dirName = args.pd.replace('/ro/','/wf/')
    targetSet = config['target']
#    for target in targetSet.keys():