Ejemplo n.º 1
0
def init():
    os.environ["CLASSPATH"] = "%s:%s:%s" % (
        pydoop.hadoop_classpath(), _ORIG_CLASSPATH, pydoop.hadoop_conf()
    )
    os.environ["LIBHDFS_OPTS"] = os.getenv(
        "LIBHDFS_OPTS", common.DEFAULT_LIBHDFS_OPTS
    ) + " -Djava.library.path=%s" % pydoop.hadoop_native()
Ejemplo n.º 2
0
def get_task_trackers(properties=None, hadoop_conf_dir=None, offline=False):
    """
  Get the list of task trackers in the Hadoop cluster.

  Each element in the returned list is in the ``(host, port)`` format.
  ``properties`` is passed to :func:`run_cmd`.

  If ``offline`` is True, try getting the list of task trackers from
  the 'slaves' file in Hadoop's configuration directory (no attempt is
  made to contact the Hadoop daemons).  In this case, ports are set to 0.
  """
    if offline:
        if not hadoop_conf_dir:
            hadoop_conf_dir = pydoop.hadoop_conf()
            slaves = os.path.join(hadoop_conf_dir, "slaves")
        try:
            with open(slaves) as f:
                task_trackers = [(l.strip(), 0) for l in f]
        except IOError:
            task_trackers = []
    else:
        stdout = run_cmd("job", ["-list-active-trackers"],
                         properties=properties,
                         hadoop_conf_dir=hadoop_conf_dir)
        task_trackers = []
        for l in stdout.splitlines():
            if not l:
                continue
            l = l.split(":")
            task_trackers.append((l[0].split("_")[1], int(l[-1])))
    return task_trackers
Ejemplo n.º 3
0
Archivo: hadut.py Proyecto: crs4/pydoop
def get_task_trackers(properties=None, hadoop_conf_dir=None, offline=False):
    """
    Get the list of task trackers in the Hadoop cluster.

    Each element in the returned list is in the ``(host, port)`` format.
    All arguments are passed to :func:`run_class`.

    If ``offline`` is :obj:`True`, try getting the list of task trackers from
    the ``slaves`` file in Hadoop's configuration directory (no attempt is
    made to contact the Hadoop daemons).  In this case, ports are set to 0.
    """
    if offline:
        if not hadoop_conf_dir:
            hadoop_conf_dir = pydoop.hadoop_conf()
            slaves = os.path.join(hadoop_conf_dir, "slaves")
        try:
            with open(slaves) as f:
                task_trackers = [(l.strip(), 0) for l in f]
        except IOError:
            task_trackers = []
    else:
        # run JobClient directly (avoids "hadoop job" deprecation)
        stdout = run_class(
            "org.apache.hadoop.mapred.JobClient", ["-list-active-trackers"],
            properties=properties, hadoop_conf_dir=hadoop_conf_dir,
            keep_streams=True
        )
        task_trackers = []
        for line in stdout.splitlines():
            if not line:
                continue
            line = line.split(":")
            task_trackers.append((line[0].split("_")[1], int(line[-1])))
    return task_trackers
Ejemplo n.º 4
0
def get_task_trackers(properties=None, hadoop_conf_dir=None, offline=False):
    """
    Get the list of task trackers in the Hadoop cluster.

    Each element in the returned list is in the ``(host, port)`` format.
    All arguments are passed to :func:`run_class`.

    If ``offline`` is :obj:`True`, try getting the list of task trackers from
    the ``slaves`` file in Hadoop's configuration directory (no attempt is
    made to contact the Hadoop daemons).  In this case, ports are set to 0.
    """
    if offline:
        if not hadoop_conf_dir:
            hadoop_conf_dir = pydoop.hadoop_conf()
            slaves = os.path.join(hadoop_conf_dir, "slaves")
        try:
            with open(slaves) as f:
                task_trackers = [(l.strip(), 0) for l in f]
        except IOError:
            task_trackers = []
    else:
        # run JobClient directly (avoids "hadoop job" deprecation)
        stdout = run_class("org.apache.hadoop.mapred.JobClient",
                           ["-list-active-trackers"],
                           properties=properties,
                           hadoop_conf_dir=hadoop_conf_dir,
                           keep_streams=True)
        task_trackers = []
        for l in stdout.splitlines():
            if not l:
                continue
            l = l.split(":")
            task_trackers.append((l[0].split("_")[1], int(l[-1])))
    return task_trackers
Ejemplo n.º 5
0
def init():
  os.environ["CLASSPATH"] = "%s:%s:%s" % (
    pydoop.hadoop_classpath(), _ORIG_CLASSPATH, pydoop.hadoop_conf()
    )
  os.environ["LIBHDFS_OPTS"] = os.getenv(
    "LIBHDFS_OPTS", common.DEFAULT_LIBHDFS_OPTS
    )
Ejemplo n.º 6
0
def get_task_trackers(properties=None, hadoop_conf_dir=None, offline=False):
  """
  Get the list of task trackers in the Hadoop cluster.

  Each element in the returned list is in the ``(host, port)`` format.
  ``properties`` is passed to :func:`run_cmd`.

  If ``offline`` is True, try getting the list of task trackers from
  the 'slaves' file in Hadoop's configuration directory (no attempt is
  made to contact the Hadoop daemons).  In this case, ports are set to 0.
  """
  if offline:
    if not hadoop_conf_dir:
      hadoop_conf_dir = pydoop.hadoop_conf()
      slaves = os.path.join(hadoop_conf_dir, "slaves")
    try:
      with open(slaves) as f:
        task_trackers = [(l.strip(), 0) for l in f]
    except IOError:
      task_trackers = []
  else:
    stdout = run_cmd("job", ["-list-active-trackers"],
                     properties=properties, hadoop_conf_dir=hadoop_conf_dir)
    task_trackers = []
    for l in stdout.splitlines():
      if not l:
        continue
      l = l.split(":")
      task_trackers.append((l[0].split("_")[1], int(l[-1])))
  return task_trackers
Ejemplo n.º 7
0
 def test_conf(self):
     os.environ['HADOOP_CONF_DIR'] = self.wd
     # silence Hadoop 3 warning
     with open(os.path.join(self.wd, 'log4j.properties'), 'w'):
         pass
     reload(pydoop)
     self.assertEqual(pydoop.hadoop_conf(), self.wd)
Ejemplo n.º 8
0
def init():
    os.environ["CLASSPATH"] = "%s:%s:%s" % (
        pydoop.hadoop_classpath(), _ORIG_CLASSPATH, pydoop.hadoop_conf()
    )
    os.environ["LIBHDFS_OPTS"] = os.getenv(
        "LIBHDFS_OPTS", common.DEFAULT_LIBHDFS_OPTS
    ) + " -Djava.library.path=%s" % pydoop.hadoop_native()
Ejemplo n.º 9
0
import sys
import os
import random
import uuid
import tempfile
import imp
import unittest
import shutil
import warnings

import pydoop


_HADOOP_HOME = pydoop.hadoop_home()
_HADOOP_CONF_DIR = pydoop.hadoop_conf()
_RANDOM_DATA_SIZE = 32
_DEFAULT_HDFS_HOST = "localhost"
_DEFAULT_HDFS_PORT = 8020 if pydoop.is_cloudera() else 9000
_DEFAULT_BYTES_PER_CHECKSUM = 512
HDFS_HOST = os.getenv("HDFS_HOST", _DEFAULT_HDFS_HOST)
HDFS_PORT = os.getenv("HDFS_PORT", _DEFAULT_HDFS_PORT)


def _get_special_chr():
    """
    This is used to check unicode support.  On some systems, depending
    on locale settings, we won't be able to use non-ASCII characters
    when interacting with system calls.  Since in such cases it
    doesn't really make sense to run these tests we set UNI_CHR to a
    regular ASCII character.
Ejemplo n.º 10
0
 def test_conf(self):
   if os.environ.has_key('HADOOP_CONF_DIR'):
     self.assertEqual(os.environ['HADOOP_CONF_DIR'], pydoop.hadoop_conf())
Ejemplo n.º 11
0
 def test_conf(self):
     if os.environ.has_key('HADOOP_CONF_DIR'):
         self.assertEqual(os.environ['HADOOP_CONF_DIR'],
                          pydoop.hadoop_conf())
Ejemplo n.º 12
0
 def test_conf(self):
     os.environ['HADOOP_CONF_DIR'] = self.wd
     reload(pydoop)
     self.assertEqual(pydoop.hadoop_conf(), self.wd)
Ejemplo n.º 13
0
 def test_conf(self):
     os.environ['HADOOP_CONF_DIR'] = self.wd
     reload(pydoop)
     self.assertEqual(pydoop.hadoop_conf(), self.wd)