コード例 #1
0
def test_temp_file_with_final_name_creates_a_tmp_file_and_keeps_it():
    with dumper.temp_file('/tmp') as (_, tmp_path):
        tmp_path = os.path.join('/tmp', tmp_path)
        assert os.path.exists(tmp_path)

    assert os.path.exists(tmp_path)
    os.unlink(tmp_path)
コード例 #2
0
def consistency(rse, delta, configuration, cache_dir, results_dir):
    logger = logging.getLogger('auditor-worker')
    rsedump, rsedate = srmdumps.download_rse_dump(rse, configuration, destdir=cache_dir)
    results_path = os.path.join(results_dir, '{0}_{1}'.format(rse, rsedate.strftime('%Y%m%d')))  # pylint: disable=no-member

    if os.path.exists(results_path + '.bz2') or os.path.exists(results_path):
        logger.warning('Consistency check for "%s" (dump dated %s) already done, skipping check', rse, rsedate.strftime('%Y%m%d'))  # pylint: disable=no-member
        return None

    rrdump_prev = ReplicaFromHDFS.download(rse, rsedate - delta, cache_dir=cache_dir)
    rrdump_next = ReplicaFromHDFS.download(rse, rsedate + delta, cache_dir=cache_dir)
    results = Consistency.dump(
        'consistency-manual',
        rse,
        rsedump,
        rrdump_prev,
        rrdump_next,
        date=rsedate,
        cache_dir=cache_dir,
    )
    mkdir(results_dir)
    with temp_file(results_dir, results_path) as (output, _):
        for result in results:
            output.write('{0}\n'.format(result.csv()))

    return results_path
コード例 #3
0
def test_temp_file_cleanup_on_exception():
    try:
        with dumper.temp_file('/tmp') as (_, tmp_path):
            tmp_path = os.path.join('/tmp', tmp_path)
            raise Exception
    except:
        pass
    finally:
        assert not os.path.exists(tmp_path)
コード例 #4
0
def test_temp_file_with_final_name_creates_a_tmp_file_and_then_removes_it():
    final_name = tempfile.mktemp()
    with dumper.temp_file('/tmp', final_name) as (_, tmp_path):
        tmp_path = os.path.join('/tmp', tmp_path)
        assert os.path.exists(tmp_path)
        assert not os.path.exists(final_name)

    assert os.path.exists(final_name)
    assert not os.path.exists(tmp_path)
    os.unlink(final_name)
コード例 #5
0
def test_temp_file_cleanup_on_exception_with_final_name():
    final_name = tempfile.mktemp()
    try:
        with dumper.temp_file('/tmp', final_name) as (_, tmp_path):
            tmp_path = os.path.join('/tmp', tmp_path)
            raise Exception
    except:
        pass
    finally:
        assert not os.path.exists(tmp_path)
        assert not os.path.exists(final_name)
コード例 #6
0
ファイル: data_models.py プロジェクト: rcarpa/rucio
    def download(cls, rse, date='latest', cache_dir=DUMPS_CACHE_DIR):
        """
        Downloads the requested dump and returns an open read-only mode file
        like object.
        """
        logger = logging.getLogger('auditor.data_models')
        requests_session = get_requests_session()
        if date == 'latest':
            url = ''.join((cls.BASE_URL, cls.URI, '?rse={0}'.format(rse)))
            request_headers = requests_session.head(url)
            for field in request_headers.headers['content-disposition'].split(
                    ';'):
                if field.startswith('filename='):
                    date = field.split('=')[1].split('_')[-1].split('.')[0]

        else:
            assert isinstance(date, datetime.datetime)
            date = date.strftime('%d-%m-%Y')  # pylint: disable=no-member
            url = ''.join((
                cls.BASE_URL,
                cls.URI,
                '?rse={0}&date={1}'.format(rse, date),
            ))

        if not os.path.isdir(cache_dir):
            os.mkdir(cache_dir)

        filename = '{0}_{1}_{2}_{3}'.format(
            cls.__name__.lower(), rse, date,
            hashlib.sha1(url.encode()).hexdigest())
        filename = re.sub(r'\W', '-', filename)
        path = os.path.join(cache_dir, filename)

        if not os.path.exists(path):
            logger.debug('Trying to download: "%s"', url)
            response = requests_session.head(url)
            if response.status_code != 200:
                logger.error(
                    'Retrieving %s returned %d status code',
                    url,
                    response.status_code,
                )
                raise HTTPDownloadFailed('Downloading {0} dump'.format(
                    cls.__name__),
                                         code=response.status_code)

            with temp_file(cache_dir, final_name=filename) as (tfile, _):
                http_download_to_file(url, tfile, session=requests_session)

        return path
コード例 #7
0
ファイル: consistency.py プロジェクト: yiiyama/rucio
def parse_and_filter_file(filepath,
                          parser=lambda s: s,
                          filter_=lambda s: s,
                          prefix=None,
                          postfix='parsed',
                          cache_dir=DUMPS_CACHE_DIR):
    '''
    Opens `filepath` as a read-only file, and for each line of the file
    for which the `filter_` function returns True, it writes a version
    parsed with the `parser` function.

    The name of the output file is generated appending '_' + `postfix` to
    the filename in `filepath`. If `prefix` is given it is used instead
    of `filepath`.

    The output file (and temporary files while processing are stored in
    `cache_dir`.

    Default values for the arguments:
        - `parser`: returns the same string.
        - `filter_`: returns True for any argument.
        - `prefix`: None (the name of the input file is used as prefix).
        - `postfix`: 'parsed'.
        - `cache_dir`: DUMPS_CACHE_DIR.

    The output file is created with a random name and renamed atomically
    when it is complete.

    '\n' is appended to each line, therefore if the input is 'a\nb\n' and `parser`
    is not especified the output will be 'a\n\nb\n\n'
    '''

    prefix = os.path.basename(filepath) if prefix is None else prefix
    output_name = '_'.join((prefix, postfix))
    output_path = os.path.join(cache_dir, output_name)

    if os.path.exists(output_path):
        return output_path

    with dumper.temp_file(cache_dir, final_name=output_name) as (output, _):
        input_ = dumper.smart_open(filepath)
        for line in input_:
            if filter_(line):
                output.write(parser(line) + '\n')

        input_.close()

    return output_path
コード例 #8
0
def download_rse_dump(rse,
                      configuration,
                      date='latest',
                      destdir=DUMPS_CACHE_DIR):
    '''
    Downloads the dump for the given ddmendpoint. If this endpoint does not
    follow the standarized method to publish the dumps it should have an
    entry in the `configuration` object describing how to download the dump.

    `rse` is the DDMEndpoint name.

    `configuration` is a RawConfigParser subclass.

    `date` is a datetime instance with the date of the desired dump or 'latest'
    to download the lastest available dump.

    `destdir` is the directory where the dump will be saved (the final component
    in the path is created if it doesn't exist).

    Return value: a tuple with the filename and a datetime instance with
    the date of the dump.
    '''
    logger = logging.getLogger('auditor.srmdumps')
    base_url, url_pattern = generate_url(rse, configuration)
    if date == 'latest':
        logger.debug('Looking for site dumps in: "%s"', base_url)
        links = get_links(base_url)
        url, date = get_newest(base_url, url_pattern, links)
    else:
        url = '{0}/{1}'.format(base_url, date.strftime(url_pattern))

    if not os.path.isdir(destdir):
        os.mkdir(destdir)

    filename = '{0}_{1}_{2}_{3}'.format('ddmendpoint', rse,
                                        date.strftime('%d-%m-%Y'),
                                        hashlib.sha1(url).hexdigest())
    filename = re.sub(r'\W', '-', filename)
    path = os.path.join(destdir, filename)

    if not os.path.exists(path):
        logger.debug('Trying to download: "%s"', url)
        with temp_file(destdir, final_name=filename) as (f, _):
            download(url, f)

    return (path, date)
コード例 #9
0
    def download(cls, rse, date, cache_dir=DUMPS_CACHE_DIR, buffer_size=65536):
        logger = logging.getLogger('auditor.hdfs')

        if not os.path.isdir(cache_dir):
            os.mkdir(cache_dir)
        tmp_dir = tempfile.mkdtemp(dir=cache_dir)

        url = cls.BASE_URL.format(date.strftime('%Y-%m-%d'), rse)
        filename = '{0}_{1}_{2}_{3}'.format(
            cls.__name__.lower(), rse, date.strftime('%d-%m-%Y'),
            hashlib.sha1(url.encode()).hexdigest())
        filename = re.sub(r'\W', '-', filename)
        path = os.path.join(cache_dir, filename)

        if os.path.exists(path):
            logger.debug('Taking Rucio Replica Dump %s for %s from cache',
                         path, rse)
            return path

        try:
            logging.debug('Trying to download: %s for %s', url, rse)

            _hdfs_get(cls.BASE_URL.format(date.strftime('%Y-%m-%d'), rse),
                      tmp_dir)
            files = (os.path.join(tmp_dir, file_)
                     for file_ in sorted(os.listdir(tmp_dir)))

            with temp_file(cache_dir, filename, binary=True) as (full_dump, _):
                for chunk_file in files:
                    with open(chunk_file, 'rb') as partial_dump:
                        while True:
                            data_chunk = partial_dump.read(buffer_size)
                            if not data_chunk:
                                break
                            full_dump.write(data_chunk)
        finally:
            shutil.rmtree(tmp_dir)

        return path