Example #1
0
class TestSchema(unittest.TestCase):
    def setUp(self):
        self.dd = {
            'application': ComponentItem(category='applications'),
            'backend': ComponentItem(category='backends'),
            'name': SimpleItem('', comparable=0),
            'workdir': SimpleItem(defvalue=None, type='string', transient=1, protected=1, comparable=0),
            'status': SimpleItem(defvalue='new', protected=1, comparable=0),
            'id': SimpleItem(defvalue=None, typelist=[str], protected=1, comparable=0),
            'inputbox': FileItem(defvalue=[], sequence=1),
            'outputbox': FileItem(defvalue=[], sequence=1),
            'overriden_copyable': SimpleItem(defvalue=None, protected=1, copyable=1),
            'plain_copyable': SimpleItem(defvalue=None, copyable=0)
        }
        self.s = Schema(Version(1, 0), self.dd)

    def test_items_list(self):
        """
        Make sure all the items are added
        """

        self.assertEqual(self.s.allItems(), self.dd.items())
        self.assertEqual(sorted(self.s.componentItems()+self.s.simpleItems()), sorted(self.dd.items()))

    def test_get_non_existant(self):
        """
        Make sure that fetching a non-existant member raises the correct exception.
        """

        def _get():
            temp = self.s['b']

        self.assertRaises(GangaAttributeError, _get)

    def test_category_name(self):
        class PClass(object):
            _category = 'jobs'
            _name = 'Job'

        self.s._pluginclass = PClass

        self.assertEqual(self.s.name, 'Job')
        self.assertEqual(self.s.category, 'jobs')

    def test_item_types(self):
        self.assertTrue(self.s['id'].isA(SimpleItem))
        self.assertTrue(self.s['application'].isA(ComponentItem))
        self.assertTrue(self.s['inputbox'].isA(ComponentItem))
        self.assertTrue(self.s['inputbox'].isA(FileItem))

    def test_item_attributes(self):
        self.assertTrue(self.s['id']['protected'])
        self.assertFalse(self.s['id']['comparable'])
        self.assertTrue(str in self.s['id']['typelist'])

    def test_implied(self):
        self.assertTrue(self.s['overriden_copyable']['copyable'])
        self.assertFalse(self.s['plain_copyable']['copyable'])
        self.assertFalse(self.s['id']['copyable'])
        self.assertTrue(self.s['application']['copyable'])
Example #2
0
 def setUp(self):
     self.dd = {
         'application':
         ComponentItem(category='applications'),
         'backend':
         ComponentItem(category='backends'),
         'name':
         SimpleItem('', comparable=0),
         'workdir':
         SimpleItem(defvalue=None,
                    type='string',
                    transient=1,
                    protected=1,
                    comparable=0),
         'status':
         SimpleItem(defvalue='new', protected=1, comparable=0),
         'id':
         SimpleItem(defvalue=None,
                    typelist=[str],
                    protected=1,
                    comparable=0),
         'inputbox':
         FileItem(defvalue=[], sequence=1),
         'outputbox':
         FileItem(defvalue=[], sequence=1),
         'overriden_copyable':
         SimpleItem(defvalue=None, protected=1, copyable=1),
         'plain_copyable':
         SimpleItem(defvalue=None, copyable=0)
     }
     self.s = Schema(Version(1, 0), self.dd)
Example #3
0
class CoreUnit(IUnit):
    _schema = Schema(Version(1, 0), dict(IUnit._schema.datadict.items() + {
    }.items()))

    _category = 'units'
    _name = 'CoreUnit'
    _exportmethods = IUnit._exportmethods + []

    def __init__(self):
        super(CoreUnit, self).__init__()

    def createNewJob(self):
        """Create any jobs required for this unit"""
        j = makeRegisteredJob()

        j.backend = self._getParent().backend.clone()

        # copy form ourselves or the parent transform depending on what's
        # specified
        fields = ['application', 'splitter', 'inputfiles',
                  'inputdata', 'inputsandbox', 'outputfiles', 'postprocessors']

        for f in fields:

            if (f == "postprocessors" and len(getattr(self, f).process_objects) > 0):
                j.postprocessors = copy.deepcopy(addProxy(self).postprocessors)
            elif (f != "postprocessors" and getattr(self, f)):
                setattr(j, f, copy.deepcopy(getattr(self, f)))
            elif (f == "postprocessors" and len(getattr(self._getParent(), f).process_objects) > 0):
                j.postprocessors = copy.deepcopy(
                    addProxy(self._getParent()).postprocessors)
            elif (f != "postprocessors" and getattr(self._getParent(), f)):
                setattr(j, f, copy.deepcopy(getattr(self._getParent(), f)))

        return j
Example #4
0
class Notifier(IPostProcessor):
    """
    Object which emails a user about jobs status are they have finished. The default behaviour is to email when a job has failed or when a master job has completed.
    Notes: 
    * Ganga must be running to send the email, so this object is only really useful if you have a ganga session running the background (e.g. screen session).
    * Will not send emails about failed subjobs if autoresubmit is on.
    """
    _schema = Schema(
        Version(1, 0), {
            'verbose': SimpleItem(defvalue=False,
                                  doc='Email on subjob completion'),
            'address': SimpleItem(
                defvalue='', doc='Email address', optional=False)
        })
    _category = 'postprocessor'
    _name = 'Notifier'
    order = 3

    def execute(self, job, newstatus):
        """
        Email user if:
        * job is a master job, or
        * job has failed but do not have auto resubmit
        * job has not failed but verbose is set to true
        """
        if len(job.subjobs) or (newstatus == 'failed' and job.do_auto_resubmit
                                is False) or (newstatus != 'failed'
                                              and self.verbose is True):
            return self.email(job, newstatus)
        return True

    def email(self, job, newstatus):
        """
        Method to email a user about a job
        """
        sender = '*****@*****.**'
        receivers = self.address

        subject = 'Ganga Notification: Job(%s) has %s.' % (job.fqid, newstatus)
        msg_string = """
Dear User,\n
Job(%s) has gone into %s state.\n
Regards,
Ganga\n
PS: This is an automated notification from Ganga, 
if you would like these messages to stop please 
remove the notifier object from future jobs.
        """ % (job.fqid, newstatus)
        msg = email.message_from_string(msg_string)
        msg['Subject'] = subject
        msg['From'] = sender
        msg['To'] = receivers
        string_message = msg.as_string()
        try:
            smtpObj = smtplib.SMTP(config['SMTPHost'])
            smtpObj.sendmail(sender, receivers, string_message)

        except smtplib.SMTPException as e:
            raise PostProcessException(str(e))
        return True
Example #5
0
class NonProxiedGangaObject(GangaObject):
    """
    This is a class which should not be present in the GPI and should not be wrapped with a proxy
    """
    _schema = Schema(Version(1, 0))
    _category = 'TestGangaObject'
    _name = 'TestGangaObject'
Example #6
0
class DefaultSplitter(ISplitter):
    """
        The DefaultSplitter is assigned to all jobs by default and is intended to provide a single subjob for every job on submit.
        This has been implemented as it potentially simplifies the internal logic in job managment significantly.

        This splitter is not expected to be configurable or to split a dataset based upon any input.
        In order to do this please make use of another splitter.
    """
    _name = "DefaultSplitter"
    ## A dummy value is required to not get a bug in writing the object to an XML repo.
    ## The nature of the problem of writing an empty schema should probably be understood more correctly but is difficult to track down -rcurrie
    _schema = Schema(
        Version(1, 0), {
            'dummy_value':
            SimpleItem(defvalue=1,
                       hidden=1,
                       visitable=0,
                       doc='the number of files per subjob',
                       typelist=["int"])
        })

    def split(self, job):

        subjobs = []

        sj = self.createSubjob(job)

        subjobs.append(sj)

        return subjobs
Example #7
0
class SimpleGangaObject(GangaObject):
    _schema = Schema(Version(1, 0), {
        'a': SimpleItem(42, typelist=[int]),
    })
    _category = 'TestGangaObject'
    _hidden = True
    _enable_plugin = True
Example #8
0
class TestGangaObject(GangaObject):
    """Test Ganga Object. Is used to construct test jobs"""
    _schema = Schema(
        Version(1, 0), {
            'id':
            SimpleItem('0', doc='ID Needed for tests'),
            'name':
            SimpleItem(
                '',
                doc=
                'optional label which may be any combination of ASCII characters',
                typelist=['str']),
            'subjobs':
            ComponentItem('internal',
                          defvalue=[],
                          sequence=1,
                          protected=1,
                          load_default=0,
                          copyable=0,
                          optional=1,
                          doc='test subjobs'),
        })
    _name = "TestGangaObject"
    _category = "internal"

    def __init__(self, name='TestObjectName', sj=0):
        super(TestGangaObject, self).__init__()
        self.name = name
        for i in range(sj):
            self.subjobs.append(TestGangaObject(name + "." + str(i)))
Example #9
0
class GridFileIndex(GangaObject):
    '''
    Data object for indexing a file on the grid. 

    @author: Hurng-Chun Lee 
    @contact: [email protected]
    '''

    _schema = Schema(
        Version(1, 0), {
            'id':
            SimpleItem(defvalue='', doc='the main identity of the file'),
            'name':
            SimpleItem(defvalue='', doc='the name of the file'),
            'md5sum':
            SimpleItem(defvalue='', doc='the md5sum of the file'),
            'attributes':
            SimpleItem(defvalue={}, doc='a key:value pairs of file metadata')
        })

    _category = 'GridFileIndex'
    _name = 'GridFileIndex'

    logger = getLogger()

    def __init__(self):
        super(GridFileIndex, self).__init__()

    def __eq__(self, other):
        return other.id == self.id
Example #10
0
class SampleGangaObject(GangaObject):
    _schema = Schema(
        Version(1, 0),
        {
            'a': SimpleItem(42, typelist=[int]),
            # 'b' is skipped on purpose
            'c': ComponentItem('gangafiles'),
        })
    _category = 'TestGangaObject'
    _name = 'TestGangaObject'

    _exportmethods = ['example', 'check_not_proxy']

    def example(self):
        return 'example_string'

    def check_not_proxy(self, obj):
        assert not Ganga.GPIDev.Base.Proxy.isProxy(
            obj), 'incoming argument should be proxy-stripped'
        ret = SampleGangaObject()
        assert not Ganga.GPIDev.Base.Proxy.isProxy(
            ret), 'new object should not be proxy-wrapped'
        return ret

    def not_proxied(self):
        return 'example_string'
Example #11
0
def taskify(baseclass, name):
    smajor = baseclass._schema.version.major
    sminor = baseclass._schema.version.minor

    cat = baseclass._category

    if cat == "applications":
        schema_items = _app_schema
        taskclass = TaskApplication
    elif cat == "splitters":
        schema_items = _splitter_schema
        taskclass = TaskSplitter

    classdict = {
        "_schema":
        Schema(Version(smajor, sminor),
               dict(baseclass._schema.datadict.items() + schema_items)),
        "_category":
        cat,
        "_name":
        name,
        "__init__":
        __task__init__,
    }

    if '_exportmethods' in baseclass.__dict__:
        classdict['_exportmethods'] = baseclass.__dict__['_exportmethods']
    cls = classobj(name, (taskclass, baseclass), classdict)

    global handler_map
    # Use the same handlers as for the base class
    handler_map.append((getName(baseclass), name))

    return cls
Example #12
0
class ICommandSet(GangaObject):
    """
    Class used to define shell commands and options for working with credentials
    """
    _schema = Schema(
        Version(1, 0), {
            "init":
            SimpleItem(defvalue="",
                       doc="Command for creating/initialising credential"),
            "info":
            SimpleItem(
                defvalue="",
                doc="Command for obtaining information about credential"),
            "destroy":
            SimpleItem(defvalue="", doc="Command for destroying credential"),
            "init_parameters":
            SimpleItem(
                defvalue={},
                doc=
                "Dictionary of parameter-value pairs to pass to init command"),
            "destroy_parameters":
            SimpleItem(
                defvalue={},
                doc=
                "Dictionary of parameter-value pairs to pass to destroy command"
            ),
            "info_parameters":
            SimpleItem(
                defvalue={},
                doc=
                "Dictionary mapping from Ganga credential properties to command-line options"
            ),
        })

    _category = "credential_commands"
    _name = "ICommandSet"
    _hidden = 1
    _enable_config = 1

    def __init__(self):
        super(ICommandSet, self).__init__()
        return

    def setConfigValues(self):
        """
        Update properties using values from relevant section of config file.
        """
        section = "defaults_%s" % self._name
        config = getConfig(section)
        for attribute in self._schema.datadict.keys():
            try:
                value = config[attribute]
                try:
                    value = eval(value)
                except:
                    pass
                setattr(self, attribute, value)
            except ConfigError:
                pass
Example #13
0
class ThreadedTestGangaObject(GangaObject):
    _schema = Schema(Version(1, 0), {
        'a': SimpleItem(42, typelist=[int]),
        'b': ComponentItem('TestGangaObject', defvalue='SimpleGangaObject'),
    })
    _category = 'TestGangaObject'
    _hidden = True
    _enable_plugin = True
Example #14
0
class ArgSplitter(ISplitter):
    """
    Split job by changing the args attribute of the application.

    This splitter only applies to the applications which have args attribute (e.g. Executable, Root).
    It is a special case of the GenericSplitter.

    This splitter allows the creation of a series of subjobs where
    the only difference between different jobs are their
    arguments. Below is an example that executes a ROOT script ~/analysis.C

    void analysis(const char* type, int events) {
      std::cout << type << "  " << events << std::endl;
    }

    with 3 different sets of arguments.

    s = ArgSplitter(args=[['AAA',1],['BBB',2],['CCC',3]])
    r = Root(version='5.10.00',script='~/analysis.C')
    j.Job(application=r, splitter=s)

    Notice how each job takes a list of arguments (in this case a list
    with a string and an integer). The splitter thus takes a list of
    lists, in this case with 3 elements so there will be 3 subjobs.

    Running the subjobs will produce the output:
    subjob 1 : AAA  1
    subjob 2 : BBB  2
    subjob 3 : CCC  3
"""
    _name = "ArgSplitter"
    _schema = Schema(
        Version(1, 0), {
            'args':
            SimpleItem(defvalue=[],
                       typelist=[
                           'list',
                           'Ganga.GPIDev.Lib.GangaList.GangaList.GangaList'
                       ],
                       sequence=1,
                       doc='A list of lists of arguments to pass to script')
        })

    def split(self, job):

        subjobs = []

        for arg in self.args:
            j = self.createSubjob(job, ['application'])
            # Add new arguments to subjob
            app = copy.deepcopy(job.application)
            app.args = arg
            j.application = app
            logger.debug('Arguments for split job is: ' + str(arg))
            subjobs.append(stripProxy(j))

        return subjobs
Example #15
0
class EmptyGangaObject(GangaObject):
    """Empty Ganga Object. Is used to construct incomplete jobs"""
    _schema = Schema(Version(0, 0), {})
    _name = "EmptyGangaObject"
    _category = "internal"
    _hidden = 1

    def __init__(self):
        super(EmptyGangaObject, self).__init__()
Example #16
0
class IncompleteObject(GangaObject):
    """ This class represents an object that could not be loaded on startup"""

    _schema = Schema(Version(0, 0), {})
    _name = "IncompleteObject"
    _category = "internal"
    _hidden = 1

    _exportmethods = ['reload', 'remove', '__repr__']

    def __init__(self, registry, this_id):
        """
        This constructs an object which is placed into the objects dict when a repo fails to load an object due to some error
        Args:
            registry (Registry): This is the registry the object belongs to
            this_id (int): This is the registry/repo id of the object in the objects dict
        """
        super(IncompleteObject, self).__init__()
        self.registry = registry
        self.id = this_id

    def reload(self):
        """
        This will trigger a re-load of the object from disk which is useful if the object was locked but accessible by Ganga
        TODO work ouf if this is still called anywhere
        """
        with self.registry._flush_lock:
            with self.registry._read_lock:
                self.registry._load(self)
                logger.debug("Successfully reloaded '%s' object #%i!" %
                             (self.registry.name, self.id))

    def remove(self):
        """
        This will trigger a delete of the the object itself from within the given Repository but not registry
        TODO work out if this is safe and still called
        """
        with self.registry._flush_lock:
            with self.registry._read_lock:
                if len(self.registry.repository.lock([self.id])) == 0:
                    errstr = "Could not lock '%s' object #%i!" % (
                        self.registry.name, self.id)
                    try:
                        errstr += " Object is locked by session '%s' " % self.registry.repository.get_lock_session(
                            self.id)
                    except Exception as err:
                        logger.debug("Remove Lock error: %s" % err)
                    raise RegistryLockError(errstr)
                self.registry.repository.delete([self.id])

    def __repr__(self):
        """
        This returns a repr of the object in question as inaccessible
        """
        return "Incomplete object in '%s', ID %i. Try reload() or remove()." % (
            self.registry.name, self.id)
Example #17
0
class Im3ShapeSplitter(ISplitter):
    """
    This splitter splits jobs using the Im3ShapeApp application using the size parameter.

    If a splitter is configured with size = 5, split_by_file = True, then it will create 5 subjobs per file in the master_job.inputdata
    If a splitter is configured with size = 5, split_by_file = False, then it will create 5 subjobs total and configure all subjobs to use all given data.

    In the future there may be support for splitting based upon regex and namePatterns in the inputdata to allow a certain subset of data to be put in each subjob.
    """
    _name = "Im3ShapeSplitter"
    _schema = Schema(
        Version(1, 0), {
            'size':
            SimpleItem(defvalue=5,
                       doc='Size of the tiles which are to be split.'),
            'split_by_file':
            SimpleItem(
                defvalue=True,
                doc=
                'Should we auto-split into subjobs here on a per-file basis?')
        })

    def split(self, job):
        """
        Actually perform the splitting of the given master job. The generated subjobs of the splitting are returned
        Args:
            job (Job): This is the master job object which is to be split and the subjobs of which are returned
        """

        assert isinstance(job.application, Im3ShapeApp)

        subjobs = []

        def getApp(job, rank, size):
            app = copy.deepcopy(job.application)
            app.rank = rank
            app.size = size
            return app

        if self.split_by_file:
            for this_file in job.inputdata:
                for rank in range(0, self.size):
                    j = self.createSubjob(job, ['application'])
                    # Add new arguments to subjob
                    j.application = getApp(job, rank, self.size)
                    j.inputdata = GangaDataset(files=[stripProxy(this_file)])
                    subjobs.append(j)
        else:
            for rank in range(0, self.size):
                j = self.createSubjob(job, ['application'])
                j.application = getApp(job, rank, self.size)
                j.inputdata = job.inputdata
                logger.debug('Rank for split job is: ' + str(rank))
                subjobs.append(j)

        return subjobs
Example #18
0
 def test_create(self):
     """
     Create a complex schema and make sure all the items are added
     """
     dd = {
         'application': ComponentItem(category='applications'),
         'backend': ComponentItem(category='backends'),
         'name': SimpleItem('', comparable=0),
         'workdir': SimpleItem(defvalue=None, type='string', transient=1, protected=1, comparable=0),
         'status': SimpleItem(defvalue='new', protected=1, comparable=0),
         'id': SimpleItem(defvalue=None, type='string', protected=1, comparable=0),
         'inputbox': FileItem(defvalue=[], sequence=1),
         'outputbox': FileItem(defvalue=[], sequence=1),
         'overriden_copyable': SimpleItem(defvalue=None, protected=1, copyable=1),
         'plain_copyable': SimpleItem(defvalue=None, copyable=0)
     }
     s = Schema(Version(1, 0), dd)
     self.assertEqual(s.allItems(), dd.items())
     self.assertEqual(sorted(s.componentItems()+s.simpleItems()), sorted(dd.items()))
Example #19
0
class GangaDatasetSplitter(ISplitter):

    """ Split job based on files given in GangaDataset inputdata field """
    _name = "GangaDatasetSplitter"
    _schema = Schema(Version(1, 0), {
        'files_per_subjob': SimpleItem(defvalue=5, doc='the number of files per subjob', typelist=[int]),

        'maxFiles': SimpleItem(defvalue=-1,
                               doc='Maximum number of files to use in a masterjob (None or -1 = all files)',
                               typelist=[int, None]),
    })

    def split(self, job):
        subjobs = []

        if not job.inputdata or not isType(job.inputdata, GangaDataset):
            raise ApplicationConfigurationError(
                "No GangaDataset given for GangaDatasetSplitter")

        # find the full file list
        full_list = []
        for f in job.inputdata.files:

            if f.containsWildcards():
                # we have a wildcard so grab the subfiles
                for sf in f.getSubFiles(process_wildcards=True):
                    full_list.append(sf)
            else:
                # no wildcards so just add the file
                full_list.append(f)

        if len(full_list) == 0:
            raise ApplicationConfigurationError(
                "GangaDatasetSplitter couldn't find any files to split over")

        masterType = type(job.inputdata)

        # split based on all the sub files
        fid = 0
        subjobs = []
        filesToRun = len(full_list)
        if not self.maxFiles == -1:
            filesToRun = min(self.maxFiles, filesToRun)
        while fid < filesToRun:
            j = self.createSubjob(job)
            j.inputdata = masterType()
            j.inputdata.treat_as_inputfiles = job.inputdata.treat_as_inputfiles
            for sf in full_list[fid:fid + self.files_per_subjob]:
                j.inputdata.files.append(sf)

            fid += self.files_per_subjob
            subjobs.append(j)

        return subjobs
Example #20
0
class CoreTask(ITask):
    """General non-experimentally specific Task"""
    _schema = Schema(Version(1, 0),
                     dict(ITask._schema.datadict.items() + {}.items()))

    _category = 'tasks'
    _name = 'CoreTask'
    _exportmethods = ITask._exportmethods + []

    _tasktype = "ITask"

    default_registry = "tasks"
Example #21
0
class Dataset(GangaObject):
    _schema = Schema(Version(1, 0), {})
    _category = 'datasets'
    _name = "EmptyDataset"

    def __init__(self):
        super(Dataset, self).__init__()

    # Return true if the dataset is an instance of the default base class.
    # You may override it in your dataset definition but it is not mandatory.
    def isEmpty(self):
        return self._name == Dataset._name
Example #22
0
class TestGangaObject(GangaObject):
    _schema = Schema(Version(1, 0), {'a': SimpleItem(42, typelist=['int'])})
    _category = 'TestGangaObject'
    _name = 'TestGangaObject'

    _exportmethods = ['example']

    def example(self):
        return 'example_string'

    def not_proxied(self):
        return 'example_string'
Example #23
0
class TaskLocalCopy(Dataset):
    """Dummy dataset to force Tasks to copy the output from a job to local storage somewhere"""

    _schema = Schema(
        Version(1, 0), {
            'local_location':
            SimpleItem(defvalue="", doc="Local location to copy files to"),
            'include_file_mask':
            SimpleItem(
                defvalue=[],
                typelist=['str'],
                sequence=1,
                doc=
                'List of Regular expressions of which files to include in copy'
            ),
            'exclude_file_mask':
            SimpleItem(
                defvalue=[],
                typelist=['str'],
                sequence=1,
                doc=
                'List of Regular expressions of which files to exclude from copy'
            ),
            'files':
            SimpleItem(defvalue=[],
                       typelist=['str'],
                       sequence=1,
                       doc='List of successfully downloaded files'),
        })

    _category = 'datasets'
    _name = 'TaskLocalCopy'
    _exportmethods = ["isValid", "isDownloaded"]

    def __init__(self):
        super(TaskLocalCopy, self).__init__()

    def isValid(self, fname):
        """Check if this file should be downloaded"""
        for in_re in self.include_file_mask:
            if not re.search(in_re, fname):
                return False

        for out_re in self.exclude_file_mask:
            if re.search(out_re, fname):
                return False

        return True

    def isDownloaded(self, fname):
        """Check if this file is present at the local_location"""
        return os.path.exists(os.path.join(self.local_location, fname))
Example #24
0
class CRABSplitter(ISplitter):
    """Splitter object for CRAB jobs."""
    schemadic = {}
    schemadic['maxevents'] = SimpleItem(defvalue=None,
                                        typelist=['type(None)', 'int'],
                                        doc='Maximum number of events/task')
    schemadic['inputfiles'] = SimpleItem(defvalue=None,
                                         typelist=['type(None)', 'str'],
                                         doc='Number of input files')
    schemadic['skipevents'] = SimpleItem(defvalue=None,
                                         typelist=['type(None)', 'int'],
                                         doc='Offset for the events')
    _name = 'CRABSplitter'
    _schema = Schema(Version(1, 0), schemadic)

    def parseArguments(self, path):
        """Gets some job arguments from the FJR."""
        splittingData = []
        for job in parse(path).getElementsByTagName("Job"):
            splittingData.append([
                job.getAttribute("MaxEvents"),
                job.getAttribute("InputFiles"),
                job.getAttribute("SkipEvents")
            ])
        return splittingData

    def split(self, job):
        """Main splitter for the job."""
        try:
            arguments_path = os.path.join(job.inputdata.ui_working_dir,
                                          'share/arguments.xml')
            splittingData = self.parseArguments(arguments_path)
        except IOError, e:
            raise SplitterError(e)

        subjobs = []
        for maxevents, inputfiles, skipevents in splittingData:
            j = self.createSubjob(job)
            j.master = job
            j.application = job.application
            j.inputdata = job.inputdata
            j.backend = job.backend

            splitter = CRABSplitter()
            splitter.maxevents = maxevents
            splitter.inputfiles = inputfiles
            splitter.skipevents = skipevents
            j.splitter = splitter
            subjobs.append(j)
        return subjobs
Example #25
0
class BoxMetadataObject(GangaObject):
    """Internal object to store names"""
    _schema = Schema(
        Version(1, 0), {
            "name":
            SimpleItem(defvalue="",
                       copyable=1,
                       doc='the name of this object',
                       typelist=[str])
        })
    _name = "BoxMetadataObject"
    _category = "internal"
    _enable_plugin = True
    _hidden = 1
Example #26
0
 def setUp(self):
     self.dd = {
         'application': ComponentItem(category='applications'),
         'backend': ComponentItem(category='backends'),
         'name': SimpleItem('', comparable=0),
         'workdir': SimpleItem(defvalue=None, type='string', transient=1, protected=1, comparable=0),
         'status': SimpleItem(defvalue='new', protected=1, comparable=0),
         'id': SimpleItem(defvalue=None, typelist=[str], protected=1, comparable=0),
         'inputbox': FileItem(defvalue=[], sequence=1),
         'outputbox': FileItem(defvalue=[], sequence=1),
         'overriden_copyable': SimpleItem(defvalue=None, protected=1, copyable=1),
         'plain_copyable': SimpleItem(defvalue=None, copyable=0)
     }
     self.s = Schema(Version(1, 0), self.dd)
Example #27
0
class CRABDataset(Dataset):
    """Dataset definition for CRAB jobsets."""
    schemadic = {}
    schemadic.update(CRAB().schemadic)
    schemadic.update(TASK().schemadic)
    #schemadic['target_site'] = SimpleItem(defvalue=None,
    #                                      typelist=['type(None)', 'str'],
    #                                      doc='Target site name for the job.')
    #schemadic['taskname'] = SimpleItem(defvalue=None, typelist=['type(None)','str'], doc='TaskName of the submitted task, returned from CRAB3 server')
    _schema = Schema(Version(1, 0), schemadic)
    _category = 'datasets'
    _name = 'CRABDataset'

    def __init__(self):
        super(CRABDataset, self).__init__()
Example #28
0
class CRABDataset(Dataset):
    """Dataset definition for CRAB jobsets."""
    schemadic = {}
    schemadic.update(CMSSW().schemadic)
    schemadic.update(CRAB().schemadic)
    schemadic.update(GRID().schemadic)
    schemadic.update(USER().schemadic)
    schemadic['target_site'] = SimpleItem(defvalue=None,
                                          typelist=['type(None)', 'str'],
                                          doc='Target site name for the job.')
    _schema = Schema(Version(1, 0), schemadic)
    _category = 'datasets'
    _name = 'CRABDataset'

    def __init__(self):
        super(CRABDataset, self).__init__()
Example #29
0
class OptionsFileSplitter(ISplitter):
    '''Split a jobs based on a list of option file fragments

    This Splitter takes as argument a list of option file statements and will
    generate a job for each item in this list. The value of the indevidual list
    item will be appended to the master options file. A use case of this
    splitter would be to change a parameter in an algorithm (e.g. a cut) and to
    recreate a set of jobs with different cuts
    '''
    _name = "OptionsFileSplitter"
    docstr = "List of option-file strings, each list item creates a new subjob"
    _schema = Schema(Version(1, 0),
                     {'optsArray': SimpleItem(defvalue=[], doc=docstr)})

    def _create_subjob(self, job, inputdata):
        j = Job()
        j.copyFrom(job)
        j.splitter = None
        j.merger = None
        j.inputsandbox = []  # master added automatically
        j.inputfiles = []
        j.inputdata = inputdata

        return j

    def split(self, job):
        subjobs = []

        inputdata = job.inputdata
        if not job.inputdata:
            share_path = os.path.join(
                expandfilename(getConfig('Configuration')['gangadir']),
                'shared',
                getConfig('Configuration')['user'],
                job.application.is_prepared.name, 'inputdata',
                'options_data.pkl')

            if os.path.exists(share_path):
                f = open(share_path, 'r+b')
                inputdata = pickle.load(f)
                f.close()

        for i in self.optsArray:
            j = self._create_subjob(job, inputdata)
            j._splitter_data = i
            subjobs.append(j)
        return subjobs
Example #30
0
class OutputData(GangaObject):
    '''Class for handling outputdata for LHCb jobs.

    Example Usage:
    od = OutputData(["file.1","file.2"])
    od[0] # "file.1"
    [...etc...]
    '''
    schema = {}
    schema['files'] = SimpleItem(defvalue=[], typelist=['str'], sequence=1)
    schema['location'] = SimpleItem(defvalue='', typelist=['str'])
    _schema = Schema(Version(1, 1), schema)
    _category = 'datasets'
    _name = "OutputData"
    _exportmethods = ['__len__', '__getitem__']

    def __init__(self, files=None):
        if files is None:
            files = []
        super(OutputData, self).__init__()
        self.files = files

    def __construct__(self, args):
        if (len(args) != 1) or (type(args[0]) not in [list, tuple]):
            super(OutputData, self).__construct__(args)
        else:
            self.files = args[0]

    def __len__(self):
        """The number of files in the dataset."""
        result = 0
        if self.files:
            result = len(self.files)
        return result

    def __nonzero__(self):
        """This is always True, as with an object."""
        return True

    def __getitem__(self, i):
        '''Proivdes scripting (e.g. od[2] returns the 3rd file name) '''
        if type(i) == type(slice(0)):
            return GPIProxyObjectFactory(OutputData(files=self.files[i]))
        else:
            return self.files[i]
Example #31
0
class IChecker(IPostProcessor):
    """
    Abstract class which all checkers inherit from.
    """
    _schema = Schema(
        Version(1, 0), {
            'checkSubjobs': SimpleItem(defvalue=True, doc='Run on subjobs'),
            'checkMaster': SimpleItem(defvalue=True, doc='Run on master')
        })
    _category = 'postprocessor'
    _name = 'IChecker'
    _hidden = 1
    order = 2

    def execute(self, job, newstatus):
        """
        Execute the check method, if check fails pass the check and issue an ERROR message. Message is also added to the debug folder.
        """
        if newstatus == 'completed':
            #   If we're master job and check master check.
            #   If not master job and check subjobs check
            if (job.master is None and self.checkMaster) or\
                    ((job.master is not None) and self.checkSubjobs):
                try:
                    return self.check(job)
                except Exception as e:
                    with open(
                            os.path.join(job.getDebugWorkspace().getPath(),
                                         'checker_errors.txt'),
                            'a') as debug_file:
                        debug_file.write(
                            '\n Checker has failed with the following error: \n'
                        )
                        debug_file.write(str(e))
                    logger.error("%s" % e)
                    return True
        else:
            return True

    def check(self, job):
        """
        Method to check the output of jobs.
        Should be overidden.
        """
        raise NotImplementedError
Example #32
0
class GridftpFileIndex(GridFileIndex):
    """
    Data object containing Gridftp file index information.

        - id: gsiftp URI
        - name: basename of the file
        - md5sum: md5 checksum
        - attributes['fpath']: path of the file on local machine

    @author: Hurng-Chun Lee
    @contact: [email protected]
    """

    _schema = Schema(Version(1, 0), gridftp_file_index_schema_datadict)
    _category = 'GridFileIndex'
    _name = 'GridftpFileIndex'

    def __init__(self):
        super(GridftpFileIndex, self).__init__()