def init(bridge_type): hadoop_classpath = pydoop.hadoop_classpath() if hadoop_classpath is None: raise RuntimeError('Hadoop classpath not set') classpath = os.environ.get('classpath', '.') + ':' + hadoop_classpath return JavaWrapperFactory(classpath=classpath, java_bridge_name=bridge_type)
def __init__(self, hadoop_vinfo): self.hadoop_vinfo = hadoop_vinfo self.jar_name = pydoop.jar_name(self.hadoop_vinfo) self.classpath = pydoop.hadoop_classpath() self.java_files = [] self.dependencies = [] self.properties = [] if (hadoop_vinfo.main >= (2, 0, 0) and (not hadoop_vinfo.is_cloudera() or hadoop_vinfo.is_yarn())): # FIXME: kinda hardwired to avro for now self.properties.append((os.path.join( "it/crs4/pydoop/mapreduce/pipes", PROP_BN), PROP_FN)) self.java_files.extend([ "src/v2/it/crs4/pydoop/NoSeparatorTextOutputFormat.java" ]) self.java_files.extend(glob.glob( 'src/v2/it/crs4/pydoop/pipes/*.java' )) self.java_files.extend(glob.glob( 'src/v2/it/crs4/pydoop/mapreduce/pipes/*.java' )) # for now we have only hadoop2 deps (avro-mapred) self.dependencies.extend(glob.glob('lib/*.jar')) else: self.java_files.extend([ "src/v1/it/crs4/pydoop/NoSeparatorTextOutputFormat.java" ]) self.java_files.extend(glob.glob( 'src/v1/org/apache/hadoop/mapred/pipes/*.java' ))
def __init__(self, hadoop_vinfo): self.hadoop_vinfo = hadoop_vinfo self.jar_name = pydoop.jar_name(self.hadoop_vinfo) self.classpath = pydoop.hadoop_classpath() self.java_files = [] self.dependencies = [] self.properties = [] if hadoop_vinfo.main >= (2, 0, 0) and \ (not hadoop_vinfo.is_cloudera() or hadoop_vinfo.is_yarn()): # This version of Hadoop has the v2 pipes API # FIXME: kinda hardwired to avro for now self.properties.append( (os.path.join("it/crs4/pydoop/mapreduce/pipes", PROP_BN), PROP_FN)) self.java_files.extend( glob.glob('src/v2/it/crs4/pydoop/pipes/*.java')) self.java_files.extend( glob.glob('src/v2/it/crs4/pydoop/mapreduce/pipes/*.java')) # for things such as avro-mapreduce self.dependencies.extend(glob.glob('lib/*.jar')) else: # Else we should be dealing with v1 pipes self.java_files.extend( glob.glob('src/v1/org/apache/hadoop/mapred/pipes/*.java')) if hadoop_vinfo.has_mrv2(): # If the installation has MRv2 we need to use v2 I/O classes self.java_files.extend( glob.glob('src/v2/it/crs4/pydoop/mapreduce/lib/output/*.java')) self.java_files.extend( ["src/v2/it/crs4/pydoop/NoSeparatorTextOutputFormat.java"]) else: self.java_files.extend( ["src/v1/it/crs4/pydoop/NoSeparatorTextOutputFormat.java"])
def init(): os.environ["CLASSPATH"] = "%s:%s:%s" % ( pydoop.hadoop_classpath(), _ORIG_CLASSPATH, pydoop.hadoop_conf() ) os.environ["LIBHDFS_OPTS"] = os.getenv( "LIBHDFS_OPTS", common.DEFAULT_LIBHDFS_OPTS ) + " -Djava.library.path=%s" % pydoop.hadoop_native()
def __init__(self, hadoop_vinfo): self.hadoop_vinfo = hadoop_vinfo self.jar_name = pydoop.jar_name(self.hadoop_vinfo) self.classpath = pydoop.hadoop_classpath() self.java_files = [] self.dependencies = [] self.properties = [] if (hadoop_vinfo.main >= (2, 0, 0) and (not hadoop_vinfo.is_cloudera() or hadoop_vinfo.is_yarn())): # FIXME: kinda hardwired to avro for now self.properties.append( (os.path.join("it/crs4/pydoop/mapreduce/pipes", PROP_BN), PROP_FN)) self.java_files.extend( ["src/v2/it/crs4/pydoop/NoSeparatorTextOutputFormat.java"]) self.java_files.extend( glob.glob('src/v2/it/crs4/pydoop/pipes/*.java')) self.java_files.extend( glob.glob('src/v2/it/crs4/pydoop/mapreduce/pipes/*.java')) # for now we have only hadoop2 deps (avro-mapred) self.dependencies.extend(glob.glob('lib/*.jar')) else: self.java_files.extend( ["src/v1/it/crs4/pydoop/NoSeparatorTextOutputFormat.java"]) self.java_files.extend( glob.glob('src/v1/org/apache/hadoop/mapred/pipes/*.java'))
def init(): os.environ["CLASSPATH"] = "%s:%s:%s" % ( pydoop.hadoop_classpath(), _ORIG_CLASSPATH, pydoop.hadoop_conf() ) os.environ["LIBHDFS_OPTS"] = os.getenv( "LIBHDFS_OPTS", common.DEFAULT_LIBHDFS_OPTS )
def _get_java_output_stream(wd): this_directory = os.path.abspath(os.path.dirname(__file__)) src = os.path.join(this_directory, "%s.java" % _HADOOP_SERIALIZE_CLASS) shutil.copy(src, wd) nsrc = os.path.join(wd, "%s.java" % _HADOOP_SERIALIZE_CLASS) classpath = '.:%s:%s' % (pydoop.hadoop_classpath(), wd) compile_java(nsrc, classpath) return get_java_output_stream(_HADOOP_SERIALIZE_CLASS, classpath, [], wd)
def init(bridge_type): hadoop_classpath = pydoop.hadoop_classpath() if hadoop_classpath is None: raise RuntimeError('Hadoop classpath not set') classpath = os.environ.get('classpath', '.') + ':' + hadoop_classpath return JavaWrapperFactory( classpath=classpath, java_bridge_name=bridge_type )
def _run_java(self, in_uri, out_uri, wd): this_directory = os.path.abspath(os.path.dirname(__file__)) shutil.copytree(os.path.join(this_directory, _JAVA_SRC_ROOT), os.path.join(wd, _JAVA_SRC_ROOT)) classpath = '.:%s:%s:%s' % (wd, pydoop.jar_path(), pydoop.hadoop_classpath()) src = os.path.join(wd, _OPAQUE_ROUNDTRIP_SRC) utils.compile_java(src, classpath) utils.run_java(_OPAQUE_ROUNDTRIP_CLASS, classpath, [in_uri, out_uri], wd)
def __init__(self): self.jar_name = pydoop.jar_name() self.classpath = pydoop.hadoop_classpath() self.java_files = glob.glob( "src/it/crs4/pydoop/mapreduce/pipes/*.java") + [ "src/it/crs4/pydoop/NoSeparatorTextOutputFormat.java" ] self.dependencies = glob.glob('lib/*.jar') self.properties = [(os.path.join("it/crs4/pydoop/mapreduce/pipes", PROP_BN), PROP_FN)]
def __init__(self, hadoop_vinfo, pipes_src_dir): self.hadoop_vinfo = hadoop_vinfo self.jar_name = pydoop.jar_name(self.hadoop_vinfo) self.classpath = pydoop.hadoop_classpath() self.java_files = ["src/it/crs4/pydoop/NoSeparatorTextOutputFormat.java"] if self.hadoop_vinfo.has_security(): if hadoop_vinfo.cdh >= (4, 0, 0) and not hadoop_vinfo.ext: return # TODO: add support for mrv2 # add our fix for https://issues.apache.org/jira/browse/MAPREDUCE-4000 self.java_files.extend(glob.glob("%s/*" % pipes_src_dir))
def _run_java(self, in_uri, out_uri, wd): this_directory = os.path.abspath(os.path.dirname(__file__)) shutil.copytree(os.path.join(this_directory, _JAVA_SRC_ROOT), os.path.join(wd, _JAVA_SRC_ROOT)) classpath = '.:%s:%s:%s' % ( wd, pydoop.jar_path(), pydoop.hadoop_classpath()) src = os.path.join(wd, _OPAQUE_ROUNDTRIP_SRC) utils.compile_java(src, classpath) utils.run_java( _OPAQUE_ROUNDTRIP_CLASS, classpath, [in_uri, out_uri], wd)
def __init__(self): self.jar_name = pydoop.jar_name() self.classpath = pydoop.hadoop_classpath() self.java_files = glob.glob( "src/it/crs4/pydoop/mapreduce/pipes/*.java" ) + ["src/it/crs4/pydoop/NoSeparatorTextOutputFormat.java"] self.dependencies = glob.glob('lib/*.jar') self.properties = [( os.path.join("it/crs4/pydoop/mapreduce/pipes", PROP_BN), PROP_FN )]
def main(argv): try: jar_name = argv[1] except IndexError: print "Usage: python %s JAR_NAME" % os.path.basename(argv[0]) return 2 if not os.path.isfile(jar_name): classpath = pydoop.hadoop_classpath() subprocess.check_call("javac -cp %s %s" % (classpath, SRC), shell=True) subprocess.check_call("jar -cvf %s %s" % (jar_name, CLASS), shell=True) return 0
def _get_java_output_stream(wd): this_directory = os.path.abspath(os.path.dirname(__file__)) src = os.path.join(this_directory, "%s.java" % _HADOOP_SERIALIZE_CLASS) shutil.copy(src, wd) classpath = ".:%s:%s" % (pydoop.hadoop_classpath(), wd) filename_root = os.path.join(wd, _HADOOP_SERIALIZE_CLASS) _compile_java_part(filename_root + ".class", classpath) output = subprocess.check_output( ["java", "-cp", classpath, _HADOOP_SERIALIZE_CLASS], cwd=wd, stderr=open("/dev/null", "w") ) stream = StringIO(output) return stream
def _get_java_output_stream(wd): this_directory = os.path.abspath(os.path.dirname(__file__)) src = os.path.join(this_directory, "%s.java" % _HADOOP_SERIALIZE_CLASS) shutil.copy(src, wd) classpath = '.:%s:%s' % (pydoop.hadoop_classpath(), wd) filename_root = os.path.join(wd, _HADOOP_SERIALIZE_CLASS) _compile_java_part(filename_root + ".class", classpath) output = subprocess.check_output( ['java', '-cp', classpath, _HADOOP_SERIALIZE_CLASS], cwd=wd, stderr=open('/dev/null', 'w')) stream = StringIO(output) return stream
def __init__(self, hadoop_vinfo): self.hadoop_vinfo = hadoop_vinfo self.jar_name = pydoop.jar_name(self.hadoop_vinfo) self.classpath = pydoop.hadoop_classpath() self.java_files = [] #if hadoop_vinfo.main >= (2, 2, 0): if hadoop_vinfo.main >= (2, 0, 0) and hadoop_vinfo.is_yarn(): self.java_files.extend( ["src/v2/it/crs4/pydoop/NoSeparatorTextOutputFormat.java"]) self.java_files.extend( glob.glob('src/v2/it/crs4/pydoop/pipes/*.java')) self.java_files.extend( glob.glob('src/v2/it/crs4/pydoop/mapreduce/pipes/*.java')) else: self.java_files.extend( ["src/v1/it/crs4/pydoop/NoSeparatorTextOutputFormat.java"]) self.java_files.extend( glob.glob('src/v1/org/apache/hadoop/mapred/pipes/*.java'))
def __init__(self, hadoop_vinfo): self.hadoop_vinfo = hadoop_vinfo self.jar_name = pydoop.jar_name(self.hadoop_vinfo) self.classpath = pydoop.hadoop_classpath() self.java_files = [] self.dependencies = [] self.properties = [] if hadoop_vinfo.main >= (2, 0, 0) and \ (not hadoop_vinfo.is_cloudera() or hadoop_vinfo.is_yarn()): # This version of Hadoop has the v2 pipes API # FIXME: kinda hardwired to avro for now self.properties.append((os.path.join( "it/crs4/pydoop/mapreduce/pipes", PROP_BN), PROP_FN)) self.java_files.extend(glob.glob( 'src/v2/it/crs4/pydoop/pipes/*.java' )) self.java_files.extend(glob.glob( 'src/v2/it/crs4/pydoop/mapreduce/pipes/*.java' )) # for things such as avro-mapreduce self.dependencies.extend(glob.glob('lib/*.jar')) else: # Else we should be dealing with v1 pipes self.java_files.extend(glob.glob( 'src/v1/org/apache/hadoop/mapred/pipes/*.java' )) if hadoop_vinfo.has_mrv2(): # If the installation has MRv2 we need to use v2 I/O classes self.java_files.extend(glob.glob( 'src/v2/it/crs4/pydoop/mapreduce/lib/output/*.java' )) self.java_files.extend([ "src/v2/it/crs4/pydoop/NoSeparatorTextOutputFormat.java" ]) else: self.java_files.extend([ "src/v1/it/crs4/pydoop/NoSeparatorTextOutputFormat.java" ])