예제 #1
0
def write(filename, content):
    """
    Write text to a file on the DFS

    :Parameters:
        filename : string
            File name where to write the text on the DFS
        content : string or list of two-item tuples
            If it is a string, the text is just written as it is.
            If it is a list of tuples, each tuple is written as a MapReduce
            entry (key, value), separated by the default separator.

    :Examples:
        dfs_write('foo', 'String of text')
        dfs_write('foo', (0, 0))
        dfs_write('foo', [(0, 1), (1, 1)])
    """
    if not isinstance(content, str):
        if not isinstance(content, list):
            content = [content]
        content = '\n'.join([
            '%s%s%s' % (str(item[0]), '\t', str(item[1])) for item in content
        ])
    options = {
        'content': content,
        'mapreduce': config.mapreduce_program,
        'filename': filename
    }
    prince.run_program(
        'echo "%(content)s" | %(mapreduce)s dfs -put - %(filename)s', options)
예제 #2
0
파일: dfs.py 프로젝트: goossaert/prince
def write(filename, content):
    """
    Write text to a file on the DFS

    :Parameters:
        filename : string
            File name where to write the text on the DFS
        content : string or list of two-item tuples
            If it is a string, the text is just written as it is.
            If it is a list of tuples, each tuple is written as a MapReduce
            entry (key, value), separated by the default separator.

    :Examples:
        dfs_write('foo', 'String of text')
        dfs_write('foo', (0, 0))
        dfs_write('foo', [(0, 1), (1, 1)])
    """
    if not isinstance(content, str):
        if not isinstance(content, list):
            content = [content]
        content = '\n'.join(['%s%s%s' % (str(item[0]), '\t', str(item[1])) for item in content])
    options = {'content':   content,
               'mapreduce': config.mapreduce_program,
               'filename':  filename }
    prince.run_program('echo "%(content)s" | %(mapreduce)s dfs -put - %(filename)s', options)
예제 #3
0
def read(filenames, first=None, last=None):
    """
    Read the content of files on the DFS. Multiple file names can be
    specified, and it is possible to read only n lines at the beginning or
    at the end of the file. 'first' and 'last' being exclusive parameters,
    if both of them are used then only 'first' is used.

    :Parameters:
        filenames : string or list of strings
            Files to read from on the DFS.
        first : int
            Number of lines to read at the beginning of the file
        last : int
            Number of lines to read at the end of the file

    :Return:
        Lines of the file(s) on the DFS.

    :ReturnType:
        List of strings.
    """
    if not isinstance(filenames, list): filenames = [filenames]
    options = {
        'mapreduce': config.mapreduce_program,
        'filenames': ' '.join(filenames)
    }
    if first: truncate = ' | head -n %s' % first
    elif last: truncate = ' | tail -n %s' % last
    else: truncate = ''
    commandline = '%(mapreduce)s dfs -cat %(filenames)s' + truncate
    return prince.run_program(commandline, options)
예제 #4
0
파일: dfs.py 프로젝트: goossaert/prince
def read(filenames, first=None, last=None):
    """
    Read the content of files on the DFS. Multiple file names can be
    specified, and it is possible to read only n lines at the beginning or
    at the end of the file. 'first' and 'last' being exclusive parameters,
    if both of them are used then only 'first' is used.

    :Parameters:
        filenames : string or list of strings
            Files to read from on the DFS.
        first : int
            Number of lines to read at the beginning of the file
        last : int
            Number of lines to read at the end of the file

    :Return:
        Lines of the file(s) on the DFS.

    :ReturnType:
        List of strings.
    """
    if not isinstance(filenames, list): filenames = [filenames]
    options = {'mapreduce': config.mapreduce_program,
               'filenames': ' '.join(filenames) }
    if first:   truncate = ' | head -n %s' % first
    elif last:  truncate = ' | tail -n %s' % last
    else:       truncate = ''
    commandline = '%(mapreduce)s dfs -cat %(filenames)s' + truncate
    return prince.run_program(commandline, options)
예제 #5
0
def exists(path):
    """
    Test if a path exists on the DFS.
    NOTE: The current implementation is based on 'dfs -ls' and is therefore
          *very* slow. This is due to the fact that the implementation of
          'dfs -test -e' in the current Hadoop version (0.20.1) is buggy
          and cannot be used properly.

    :Parameters:
        path : string
            Path of which the existence on the DFS has to be tested.

    :Return: 
        True if the path exists, False otherwise

    :ReturnType:
        Boolean
    """
    options = {'mapreduce': config.mapreduce_program, 'path': path}
    found = prince.run_program('%(mapreduce)s dfs -ls %(path)s', options)
    return True if found else False
예제 #6
0
파일: dfs.py 프로젝트: goossaert/prince
def exists(path):
    """
    Test if a path exists on the DFS.
    NOTE: The current implementation is based on 'dfs -ls' and is therefore
          *very* slow. This is due to the fact that the implementation of
          'dfs -test -e' in the current Hadoop version (0.20.1) is buggy
          and cannot be used properly.

    :Parameters:
        path : string
            Path of which the existence on the DFS has to be tested.

    :Return: 
        True if the path exists, False otherwise

    :ReturnType:
        Boolean
    """
    options = {'mapreduce': config.mapreduce_program,
               'path':      path}
    found = prince.run_program('%(mapreduce)s dfs -ls %(path)s', options)
    return True if found else False