def setup_paths(module_paths): """Set up sys.path on the mappers and reducers. module_paths is an array of path names where the sources or other supporting files are found. In particular, module_paths[0] is the location of the PyCascading Python sources, and modules_paths[1] is the location of the source file defining the function. In Hadoop mode (with remote_deploy.sh), the first two -a options must specify the archives of the PyCascading sources and the job sources, respectively. Arguments: module_paths -- the locations of the Python sources """ from com.twitter.pycascading import Util cascading_jar = Util.getCascadingJar() jython_dir = module_paths[0] sys.path.extend( (cascading_jar, jython_dir + '/python', jython_dir + '/python/Lib')) sys.path.extend(module_paths[1:]) # Allow importing of user-installed Jython packages # Thanks to Simon Radford import site site.addsitedir(jython_dir + 'python/Lib/site-packages')
def setup_paths(module_paths): """Set up sys.path on the mappers and reducers. module_paths is an array of path names where the sources or other supporting files are found. In particular, module_paths[0] is the location of the PyCascading Python sources, and modules_paths[1] is the location of the source file defining the function. In Hadoop mode (with remote_deploy.sh), the first two -a options must specify the archives of the PyCascading sources and the job sources, respectively. Arguments: module_paths -- the locations of the Python sources """ from com.twitter.pycascading import Util cascading_jar = Util.getCascadingJar() jython_dir = module_paths[0] sys.path.extend((cascading_jar, jython_dir + '/python', jython_dir + '/python/Lib')) sys.path.extend(module_paths[1 : ]) # Allow importing of user-installed Jython packages # Thanks to Simon Radford import site site.addsitedir(jython_dir + 'python/Lib/site-packages')
def load_source(module_name, file_name): """Loads the given module from a Python source file. Arguments: module_name -- the name of the variable read the module into file_name -- the file that contains the source for the module """ from com.twitter.pycascading import Util cascading_jar = Util.getCascadingJar() tmp_dir = Util.getJarFolder() sys.path.extend((cascading_jar, tmp_dir + '/python', tmp_dir + '/python/Lib')) # Haha... it's necessary to put this here, otherwise simplejson won't work. # Maybe it's automatically imported in the beginning of a Jython program, # but since at that point the sys.path is not set yet to Lib, it will fail? #import encodings return imp.load_source(module_name, file_name)
def load_source(module_name, file_name, module_paths): """Loads the given module from a Python source file. This function is called by PythonFunctionWrapper.prepare(...) after it started the Python interpreter to request the given source file to be loaded. The function is to be found in this source file. module_paths is an array of path names where the sources or other supporting files are found. In particular, module_paths[0] is the location of the PyCascading Python sources, and modules_paths[1] is the location of the source file defining the function. In Hadoop mode (with remote_deploy.sh), the first two -a options must specify the archives of the PyCascading sources and the job sources, respectively. Arguments: module_name -- the name of the variable read the module into file_name -- the file that contains the source for the module module_paths -- the locations of the Python sources """ # This one should be on the classpath from the job jar or the extracted jar from com.twitter.pycascading import Util cascading_jar = Util.getCascadingJar() jython_dir = module_paths[0] sys.path.extend((cascading_jar, jython_dir + '/python', jython_dir + '/python/Lib')) sys.path.extend(module_paths[1 : ]) # Allow importing of user-installed Jython packages import site site.addsitedir(jython_dir + 'python/Lib/site-packages') # Haha... it's necessary to put this here, otherwise simplejson won't work. # Maybe it's automatically imported in the beginning of a Jython program, # but since at that point the sys.path is not set yet to Lib, it will fail? #import encodings return imp.load_source(module_name, file_name)
""" __author__ = 'Gabor Szabo' import sys, imp if __name__ == "__main__": # The first command line parameter must be 'hadoop' or 'local' # to indicate the running mode running_mode = sys.argv[1] from com.twitter.pycascading import Util cascading_jar = Util.getCascadingJar() # This is the folder where Hadoop extracted the jar file for execution tmp_dir = Util.getJarFolder() # The initial value of sys.path is JYTHONPATH plus whatever Jython appends # to it (normally the Python standard libraries the come with Jython) sys.path.extend((cascading_jar, '.', tmp_dir, tmp_dir + 'python', tmp_dir + 'python/Lib')) # Haha... it's necessary to put this here, otherwise simplejson won't work. # Maybe it's automatically imported in the beginning of a Jython program, # but since at that point the sys.path is not set yet to Lib, it will fail? # Instead, we can use Java's JSON decoder... # import encodings m = imp.load_source('main', sys.argv[2])
# to indicate the running mode running_mode = sys.argv[1] # The second is the location of the PyCascading Python sources in local # mode, and the PyCascading tarball in Hadoop mode python_dir = sys.argv[2].encode() print '=> PYTHON DIR: ' + python_dir # Remove the first two arguments so that sys.argv will look like as # if it was coming from a simple command line execution # The further parameters are the command line parameters to the script sys.argv = sys.argv[3:] from com.twitter.pycascading import Util cascading_jar = Util.getCascadingJar().encode() cascading_hadoop_jar = cascading_jar.replace("-core", "-hadoop") print '=> CASCADING JAR: ' + cascading_jar # This is the folder where Hadoop extracted the jar file for execution tmp_dir = Util.getJarFolder() print 'TMP_DIR: ' + tmp_dir.encode() Util.setPycascadingRoot(python_dir) # The initial value of sys.path is JYTHONPATH plus whatever Jython appends # to it (normally the Python standard libraries the come with Jython) sys.path.extend((cascading_jar, cascading_hadoop_jar, '.', tmp_dir, python_dir + '/python', python_dir + '/python/Lib')) print 'SYS PATH: ' + ', '.join(sys.path) + "\n" # Allow the importing of user-installed Jython packages import site
# The first command line parameter must be 'hadoop' or 'local' # to indicate the running mode running_mode = sys.argv[1] # The second is the location of the PyCascading Python sources in local # mode, and the PyCascading tarball in Hadoop mode python_dir = sys.argv[2] # Remove the first two arguments so that sys.argv will look like as # if it was coming from a simple command line execution # The further parameters are the command line parameters to the script sys.argv = sys.argv[3:] from com.twitter.pycascading import Util cascading_jar = Util.getCascadingJar() # This is the folder where Hadoop extracted the jar file for execution tmp_dir = Util.getJarFolder() Util.setPycascadingRoot(python_dir) # The initial value of sys.path is JYTHONPATH plus whatever Jython appends # to it (normally the Python standard libraries the come with Jython) sys.path.extend((cascading_jar, '.', tmp_dir, python_dir + '/python', python_dir + '/python/Lib')) # Allow the importing of user-installed Jython packages import site site.addsitedir(python_dir + 'python/Lib/site-packages') import os