Esempio n. 1
0
    def __init__(self,
                 expFileName,
                 sysPath,
                 messengerHost,
                 messengerPort,
                 messengerUser,
                 messengerPassword,
                 sync,
                 aggregator,
                 learnerFactory,
                 dataSourceFactory,
                 stoppingCriterion,
                 initHandler=InitializationHandler()):
        self._uniqueId = str(os.getpid())
        self.expFileName = expFileName
        self.sysPath = sysPath

        self.messengerHost = messengerHost
        self.messengerPort = messengerPort
        self.messengerUser = messengerUser
        self.messengerPassword = messengerPassword

        self.sync = sync
        self.aggregator = aggregator
        self.learnerFactory = learnerFactory
        self.dataSourceFactory = dataSourceFactory
        self.dataScheduler = IntervalDataScheduler()

        self.initHandler = initHandler
        self.stoppingCriterion = stoppingCriterion
Esempio n. 2
0
    def __init__(self,
                 executionMode,
                 messengerHost,
                 messengerPort,
                 numberOfNodes,
                 sync,
                 aggregator,
                 learnerFactory,
                 dataSourceFactory,
                 stoppingCriterion,
                 initHandler=InitializationHandler(),
                 dataScheduler=IntervalDataScheduler,
                 minStartNodes=0,
                 minStopNodes=0,
                 sleepTime=5):
        self.executionMode = executionMode
        if executionMode == 'cpu':
            self.devices = None
            self.modelsPer = None
        else:
            self.devices = []
            if os.environ.get('CUDA_VISIBLE_DEVICES') is None:
                gpuIds = range(
                    str(subprocess.check_output(["nvidia-smi",
                                                 "-L"])).count('UUID'))
            else:
                gpuIds = os.environ.get('CUDA_VISIBLE_DEVICES').split(',')
            for taskid in gpuIds:
                self.devices.append('cuda:' + str(taskid))
            self.modelsPer = math.ceil(numberOfNodes * 1.0 / len(self.devices))
            print(self.modelsPer, "models per gpu on", ','.join(self.devices))

        self.messengerHost = messengerHost
        self.messengerPort = messengerPort
        self.numberOfNodes = numberOfNodes
        self.sync = sync
        self.aggregator = aggregator
        self.learnerFactory = learnerFactory
        self.dataSourceFactory = dataSourceFactory
        self.stoppingCriterion = stoppingCriterion
        self.initHandler = initHandler
        self.dataScheduler = dataScheduler
        self._uniqueId = str(os.getpid())
        self.sleepTime = sleepTime
        self.minStartNodes = minStartNodes
        self.minStopNodes = minStopNodes
Esempio n. 3
0
    aggregator = Average()
    stoppingCriterion = MaxAmountExamples(2800)
    dsFactory = FileDataSourceFactory(
        filename="../../../../data/textualMNIST/mnist_train.txt",
        decoder=MNISTDecoder(),
        numberOfNodes=numberOfNodes,
        indices='roundRobin',
        shuffle=False,
        cache=False)
    learnerFactory = PytorchLearnerFactory(network=DropoutNet(),
                                           updateRule=updateRule,
                                           learningRate=learningRate,
                                           learningParams=learningParams,
                                           lossFunction=lossFunction,
                                           batchSize=batchSize,
                                           syncPeriod=syncPeriod)
    initHandler = InitializationHandler()

    exp = Experiment(executionMode=executionMode,
                     messengerHost=messengerHost,
                     messengerPort=messengerPort,
                     numberOfNodes=numberOfNodes,
                     sync=sync,
                     aggregator=aggregator,
                     learnerFactory=learnerFactory,
                     dataSourceFactory=dsFactory,
                     stoppingCriterion=stoppingCriterion,
                     initHandler=initHandler)
    exp.run("MNISTtorchCNNwithDropOut")