Ejemplo n.º 1
0
    def execute(self, arborapi=None):

        print self.name + " executing"
        # setup mongo connection and look through all input collections, copying
        connection = Connection('localhost', 27017)
        if len(self.databaseName) > 0:
            db = connection[self.databaseName]
        else:
            db = connection['arbor']
        outputcoll = db[self.outputInformation.collectionName]
        # clear out the output to prepare for running an algorithm
        outputcoll.drop()

        # check that we have both inputs needed (tree and matrix specs).  If so, then read the
        # dataset spec records
        # from the input collections and use the dataset references to invoke the algorithm, which
        # requires just the references (since the algorithm opens the collections directly).

        if len(self.inputs) == 2:
            treeSpecCollection = db[self.inputs[0].collectionName]
            matrixSpecCollection = db[self.inputs[1].collectionName]
            treequery = dict()
            treequery['key'] = 'project'
            treeProjectName = treeSpecCollection.find(treequery)[0]['value']
            treequery2 = dict()
            treequery2['key'] = 'dataset'
            treeDatasetName = treeSpecCollection.find(treequery2)[0]['value']
            print "treeProject:", treeProjectName, " dataset:", treeDatasetName
            matrixQuery = dict()
            matrixQuery['key'] = 'dataset'
            matrixDatasetName = matrixSpecCollection.find(
                treequery2)[0]['value']
            # all datasets have to be in the same project for the algorithms to look them up
            # so all we need to read is the dataset name for the character matrix, use the
            # rest of the information from the tree dataset
            print "matrix Project:", treeProjectName, " dataset:", matrixDatasetName

            # check that the two needed parameters (character and outputTree) are defined
            # for this workstep instance, attempt processing only if they are defined. If everything
            # is available, run the algorithm on the selected datasets.  We use the pre-allocated version
            # of the

            if ('outputTree' in self.parameters):
                print "dataIntegrator: found outputTree defined as: ", self.parameters[
                    'outputTree']

                algorithms = ArborAlgorithmManager()
                algorithms.setProjectManagerAPI(arborapi)
                algorithms.initAlgorithmLibrary()

                # only attempt to run the analysis if the algorithm subsystem is defined. This relies
                # on the global algorithms definition
                if algorithms != None:
                    print "dataIntegrator: running algorithm"
                    algorithm_result = algorithms.dataIntegrator(
                        self.databaseName, treeProjectName, treeDatasetName,
                        matrixDatasetName, None, self.parameters['outputTree'])
                else:
                    print "dataIntegrator:  couldn't connect with AlgorithmManager instance.. skipping"
            else:
                print "dataIntegrator: Please define an outputtree parameter before running dataIntegrator"
        else:
            print "dataIntegrator: Exactly two inputs (a treeSpec and matrixSpec) are required"

        # a character matrix that is still a data.frame is returned.  May need to use pandas to convert back to
        # python.

        print "dataIntegrator:  character matrix output is not yet generated"

        for i in range(len(algorithm_result)):
            #print i
            record = algorithm_result[i]
            #print record
            #outputcoll.insert(record)

        # rest the filter's modified time and assign it to the output object
        self.outputInformation.modifiedTime = self.modifiedTime = time.time()
        print self.name, " completed fit continuous workstep "
        connection.close()
Ejemplo n.º 2
0
    def execute(self, arborapi=None):

        print self.name+" executing"
        # setup mongo connection and look through all input collections, copying
        connection = Connection('localhost', 27017)
        if len(self.databaseName)>0:
            db = connection[self.databaseName]
        else:
            db = connection['arbor']
        outputcoll = db[self.outputInformation.collectionName]
        # clear out the output to prepare for running an algorithm
        outputcoll.drop()

        # check that we have both inputs needed (tree and matrix specs).  If so, then read the
        # dataset spec records
        # from the input collections and use the dataset references to invoke the algorithm, which
        # requires just the references (since the algorithm opens the collections directly).

        if len(self.inputs) == 2:
            treeSpecCollection = db[self.inputs[0].collectionName]
            matrixSpecCollection = db[self.inputs[1].collectionName]
            treequery = dict()
            treequery['key'] = 'project'
            treeProjectName = treeSpecCollection.find(treequery)[0]['value']
            treequery2 = dict()
            treequery2['key'] = 'dataset'
            treeDatasetName = treeSpecCollection.find(treequery2)[0]['value']
            print "treeProject:",treeProjectName," dataset:",treeDatasetName
            matrixQuery = dict()
            matrixQuery['key'] = 'dataset'
            matrixDatasetName = matrixSpecCollection.find(treequery2)[0]['value']
            # all datasets have to be in the same project for the algorithms to look them up
            # so all we need to read is the dataset name for the character matrix, use the
            # rest of the information from the tree dataset
            print "matrix Project:",treeProjectName," dataset:",matrixDatasetName

            # check that the two needed parameters (character and outputTree) are defined
            # for this workstep instance, attempt processing only if they are defined. If everything
            # is available, run the algorithm on the selected datasets.  We use the pre-allocated version
            # of the

            if  ('outputTree' in self.parameters):
                print "dataIntegrator: found outputTree defined as: ",self.parameters['outputTree']

                algorithms = ArborAlgorithmManager()
                algorithms.setProjectManagerAPI(arborapi)
                algorithms.initAlgorithmLibrary()

                # only attempt to run the analysis if the algorithm subsystem is defined. This relies
                # on the global algorithms definition
                if algorithms != None:
                    print "dataIntegrator: running algorithm"
                    algorithm_result = algorithms.dataIntegrator(self.databaseName,treeProjectName,treeDatasetName, matrixDatasetName,
                          None,self.parameters['outputTree'])
                else:
                    print "dataIntegrator:  couldn't connect with AlgorithmManager instance.. skipping"
            else:
                print "dataIntegrator: Please define an outputtree parameter before running dataIntegrator"
        else:
            print "dataIntegrator: Exactly two inputs (a treeSpec and matrixSpec) are required"

        # a character matrix that is still a data.frame is returned.  May need to use pandas to convert back to
        # python.

        print "dataIntegrator:  character matrix output is not yet generated"

        for i in range(len(algorithm_result)):
            #print i
            record = algorithm_result[i]
            #print record
            #outputcoll.insert(record)

        # rest the filter's modified time and assign it to the output object
        self.outputInformation.modifiedTime = self.modifiedTime = time.time()
        print self.name," completed fit continuous workstep "
        connection.close()
Ejemplo n.º 3
0
    def execute(self, arborapi=None):

        print self.name + " executing"
        # setup mongo connection and look through all input collections, copying
        connection = Connection('localhost', 27017)
        if len(self.databaseName) > 0:
            db = connection[self.databaseName]
        else:
            db = connection['arbor']
        outputcoll = db[self.outputInformation.collectionName]
        # clear out the output to prepare for running an algorithm
        outputcoll.drop()

        # check that we have both inputs needed (tree and matrix specs).  If so, then read the
        # dataset spec records
        # from the input collections and use the dataset references to invoke the algorithm, which
        # requires just the references (since the algorithm opens the collections directly).

        if len(self.inputs) == 2:
            treeSpecCollection = db[self.inputs[0].collectionName]
            matrixSpecCollection = db[self.inputs[1].collectionName]
            treequery = dict()
            treequery['key'] = 'project'
            treeProjectName = treeSpecCollection.find(treequery)[0]['value']
            treequery2 = dict()
            treequery2['key'] = 'dataset'
            treeDatasetName = treeSpecCollection.find(treequery2)[0]['value']
            print "treeProject:", treeProjectName, " dataset:", treeDatasetName
            matrixQuery = dict()
            matrixQuery['key'] = 'dataset'
            matrixDatasetName = matrixSpecCollection.find(
                treequery2)[0]['value']
            # all datasets have to be in the same project for the algorithms to look them up
            # so all we need to read is the dataset name for the character matrix, use the
            # rest of the information from the tree dataset
            print "matrix Project:", treeProjectName, " dataset:", matrixDatasetName

            # check that the two needed parameters (character and outputTree) are defined
            # for this workstep instance, attempt processing only if they are defined. If everything
            # is available, run the algorithm on the selected datasets.  We use the pre-allocated version
            # of the

            if ('character' in self.parameters) and ('outputTree'
                                                     in self.parameters):
                print "fitContinuous: found selected character defined as: ", self.parameters[
                    'character']
                print "fitContinuous: found outputTree defined as: ", self.parameters[
                    'outputTree']

                algorithms = ArborAlgorithmManager()
                algorithms.setProjectManagerAPI(arborapi)
                algorithms.initAlgorithmLibrary()

                # look to see if the user defined model parameters on this workstep.  If so, use
                # them to override the default OU model

                if ('model' in self.parameters):
                    modelToUse = self.parameters['model']
                else:
                    modelToUse = '"OU"'

                # only attempt to run the analysis if the algorithm subsystem is defined. This relies
                # on the global algorithms definition
                if algorithms != None:
                    print "fitContinous: running algorithm"
                    algorithm_result = algorithms.fitContinuous(
                        self.databaseName, treeProjectName, treeDatasetName,
                        matrixDatasetName,
                        self.parameters['selectedCharacter'],
                        self.parameters['outputTree'], modelToUse)
                else:
                    print "fitContinous:  couldn't connect with AlgorithmManager instance.. skipping"
            else:
                print "fitContinuous: Please define both a character parameter and an outputtree parameter before running fitContinuous"
        else:
            print "fitContinuous: Exactly two inputs (a treeSpec and matrixSpec) are required"

        # write  the output collection and indicate the output time changed. A tree is created and some
        # output parameters are returned, put them in the standard key/value form of the table spec:

        for result in algorithm_result:
            record = dict()
            record['key'] = result
            record['value'] = algorithm_result[result]
            outputcoll.insert(record)

        # rest the filter's modified time and assign it to the output object
        self.outputInformation.modifiedTime = self.modifiedTime = time.time()
        print self.name, " completed fit continuous workstep "
        connection.close()
Ejemplo n.º 4
0
    def execute(self, arborapi=None):

        print self.name+" executing"
        # setup mongo connection and look through all input collections, copying
        connection = Connection('localhost', 27017)
        if len(self.databaseName)>0:
            db = connection[self.databaseName]
        else:
            db = connection['arbor']
        outputcoll = db[self.outputInformation.collectionName]
        # clear out the output to prepare for running an algorithm
        outputcoll.drop()

        # check that we have both inputs needed (tree and matrix specs).  If so, then read the
        # dataset spec records
        # from the input collections and use the dataset references to invoke the algorithm, which
        # requires just the references (since the algorithm opens the collections directly).

        if len(self.inputs) == 2:
            treeSpecCollection = db[self.inputs[0].collectionName]
            matrixSpecCollection = db[self.inputs[1].collectionName]
            treequery = dict()
            treequery['key'] = 'project'
            treeProjectName = treeSpecCollection.find(treequery)[0]['value']
            treequery2 = dict()
            treequery2['key'] = 'dataset'
            treeDatasetName = treeSpecCollection.find(treequery2)[0]['value']
            print "treeProject:",treeProjectName," dataset:",treeDatasetName
            matrixQuery = dict()
            matrixQuery['key'] = 'dataset'
            matrixDatasetName = matrixSpecCollection.find(treequery2)[0]['value']
            # all datasets have to be in the same project for the algorithms to look them up
            # so all we need to read is the dataset name for the character matrix, use the
            # rest of the information from the tree dataset
            print "matrix Project:",treeProjectName," dataset:",matrixDatasetName

            # check that the two needed parameters (character and outputTree) are defined
            # for this workstep instance, attempt processing only if they are defined. If everything
            # is available, run the algorithm on the selected datasets.  We use the pre-allocated version
            # of the

            if ('character' in self.parameters) and ('outputTree' in self.parameters):
                print "fitContinuous: found selected character defined as: ",self.parameters['character']
                print "fitContinuous: found outputTree defined as: ",self.parameters['outputTree']

                algorithms = ArborAlgorithmManager()
                algorithms.setProjectManagerAPI(arborapi)
                algorithms.initAlgorithmLibrary()

                # look to see if the user defined model parameters on this workstep.  If so, use
                # them to override the default OU model

                if ('model' in self.parameters):
                    modelToUse = self.parameters['model']
                else:
                    modelToUse = '"OU"'

                # only attempt to run the analysis if the algorithm subsystem is defined. This relies
                # on the global algorithms definition
                if algorithms != None:
                    print "fitContinous: running algorithm"
                    algorithm_result = algorithms.fitContinuous(self.databaseName,treeProjectName,treeDatasetName, matrixDatasetName,
                          self.parameters['selectedCharacter'],self.parameters['outputTree'],modelToUse)
                else:
                    print "fitContinous:  couldn't connect with AlgorithmManager instance.. skipping"
            else:
                print "fitContinuous: Please define both a character parameter and an outputtree parameter before running fitContinuous"
        else:
            print "fitContinuous: Exactly two inputs (a treeSpec and matrixSpec) are required"


        # write  the output collection and indicate the output time changed. A tree is created and some
        # output parameters are returned, put them in the standard key/value form of the table spec:

        for result in algorithm_result:
            record = dict()
            record['key'] = result
            record['value'] = algorithm_result[result]
            outputcoll.insert(record)

        # rest the filter's modified time and assign it to the output object
        self.outputInformation.modifiedTime = self.modifiedTime = time.time()
        print self.name," completed fit continuous workstep "
        connection.close()