Esempio n. 1
0
def merge(path, dst):
    """
    Merges files in a specified directory to a specified file.
    """
    input = DatasetPath(happy.getJobConf(), path)
    output = DatasetPath(happy.getJobConf(), dst)
    input.copyTo(output)
Esempio n. 2
0
def createCollector(path, fs="dfs", type="text", compressiontype="lzo", sequencetype="BLOCK"):
    """
    Creates a type "text" (default) or "sequence" file collector at the specified path.
    Collectors are automatically closed at the end of the job.
    """
    filesystem = getFileSystem(fs)
    datasetPath = DatasetPath(filesystem, path)
    datasetPath.deletePath()
    if type == "sequence":
        collector = TextSequenceFileCollector(filesystem, happy.getJobConf(), Path(path),
                                              _getSequenceFileType(sequencetype), _getCodecInstance(compressiontype))
    elif type == "text":
        collector = TextFileCollector(filesystem, happy.getJobConf(), Path(path))
    elif type == "bjson":
        collector = BJSONCollector(filesystem, happy.getJobConf(), Path(path),
                                   _getSequenceFileType(sequencetype), _getCodecInstance(compressiontype))
    else: raise Exception("Unknown collector type " + type)
    # add as a closeable so that it is closed correctly:
    if happy.job is not None: happy.job.addCloseable(collector)
    return collector