Ejemplo n.º 1
0
def discover_blocks(options):
    # Get work directory, create dbs dump directory
    if os.path.isdir(options.args[0]):
        workDir = os.path.abspath(os.path.normpath(options.args[0]))
    else:
        workDir = getConfig(configFile=options.args[0]).getWorkPath()
    if not options.opts.tempdir:
        options.opts.tempdir = os.path.join(workDir, 'dbs')
    if not os.path.exists(options.opts.tempdir):
        os.mkdir(options.opts.tempdir)

    # get provider with dataset information
    if options.opts.input_file:
        provider = DataProvider.createInstance('ListProvider', getConfig(),
                                               options.opts.input_file, None)
    else:
        config = getConfig(configDict={'dataset': options.config_dict})
        provider = DataProvider.createInstance('DBSInfoProvider', config,
                                               options.args[0], None)

    blocks = provider.getBlocks(show_stats=False)
    DataProvider.saveToFile(os.path.join(options.opts.tempdir, 'dbs.dat'),
                            blocks)
    if options.opts.discovery:
        sys.exit(os.EX_OK)
    return blocks
Ejemplo n.º 2
0
def save_dataset(opts, provider):
	print('')
	blocks = provider.getBlocks()
	if opts.ordered:
		sort_inplace(blocks, key = itemgetter(DataProvider.Dataset, DataProvider.BlockName))
		for b in blocks:
			sort_inplace(b[DataProvider.FileList], key = itemgetter(DataProvider.URL))
	DataProvider.saveToFile(opts.save, blocks)
	print('Dataset information saved to ./%s' % opts.save)
Ejemplo n.º 3
0
    def __init__(self,
                 dataDir,
                 srcName,
                 dataProvider,
                 dataSplitter,
                 dataProc,
                 repository,
                 keepOld=True):
        LimitedResyncParameterSource.__init__(self)
        (self._dn, self._name, self._data_provider, self._data_splitter, self._part_proc, self._keepOld) = \
         (dataDir, srcName, dataProvider, dataSplitter, dataProc, keepOld)
        repository['dataset:%s' % srcName] = self
        self.resyncSetup(interval=-1)

        if not dataProvider:  # debug mode - used by scripts - disables resync
            self._maxN = self._data_splitter.getMaxJobs()
            return

        # look for aborted resyncs - and try to restore old state if possible
        if self._existsDataPath('cache.dat.resync') and self._existsDataPath(
                'map.tar.resync'):
            utils.renameFile(self._getDataPath('cache.dat.resync'),
                             self._getDataPath('cache.dat'))
            utils.renameFile(self._getDataPath('map.tar.resync'),
                             self._getDataPath('map.tar'))
        elif self._existsDataPath('cache.dat.resync') or self._existsDataPath(
                'map.tar.resync'):
            raise DatasetError('Found broken resync state')

        if self._existsDataPath('cache.dat') and self._existsDataPath(
                'map.tar'):
            self._data_splitter.importPartitions(self._getDataPath('map.tar'))
        else:
            DataProvider.saveToFile(
                self._getDataPath('cache.dat'),
                self._data_provider.getBlocks(show_stats=False))
            self._data_splitter.splitDataset(
                self._getDataPath('map.tar'),
                self._data_provider.getBlocks(show_stats=False))

        self._maxN = self._data_splitter.getMaxJobs()
Ejemplo n.º 4
0
    def __init__(self,
                 dataDir,
                 srcName,
                 dataProvider,
                 dataSplitter,
                 dataProc,
                 keepOld=True):
        ParameterSource.__init__(self)
        (self._dataDir, self._srcName, self._dataProvider, self._dataSplitter, self._part_proc) = \
         (dataDir, srcName, dataProvider, dataSplitter, dataProc)

        if not dataProvider:
            pass  # debug mode - used by scripts - disables resync
        elif os.path.exists(
                self.getDataPath('cache.dat') and self.getDataPath('map.tar')):
            self._dataSplitter.importPartitions(self.getDataPath('map.tar'))
        else:
            DataProvider.saveToFile(self.getDataPath('cache.dat'),
                                    self._dataProvider.getBlocks(silent=False))
            self._dataSplitter.splitDataset(self.getDataPath('map.tar'),
                                            self._dataProvider.getBlocks())

        self._maxN = self._dataSplitter.getMaxJobs()
        self._keepOld = keepOld