Exemple #1
0
 def test_pydoop_jar_path(self):
     jar_path = pydoop.jar_path()
     if jar_path is not None:
         self.assertTrue(os.path.exists(jar_path))
         directory, filename = os.path.split(jar_path)
         self.assertEqual(filename, pydoop.jar_name())
         self.assertEqual('pydoop', os.path.basename(directory))
Exemple #2
0
 def test_pydoop_jar_path(self):
   jar_path = pydoop.jar_path()
   if jar_path is not None:
     self.assertTrue(os.path.exists(jar_path))
     directory, filename = os.path.split(jar_path)
     self.assertEqual(filename, pydoop.jar_name())
     self.assertEqual('pydoop', os.path.basename(directory))
Exemple #3
0
    def __init__(self, hadoop_vinfo):
        self.hadoop_vinfo = hadoop_vinfo
        self.jar_name = pydoop.jar_name(self.hadoop_vinfo)
        self.classpath = pydoop.hadoop_classpath()
        self.java_files = []
        self.dependencies = []
        self.properties = []
        if hadoop_vinfo.main >= (2, 0, 0) and \
           (not hadoop_vinfo.is_cloudera() or hadoop_vinfo.is_yarn()):
            # This version of Hadoop has the v2 pipes API
            # FIXME: kinda hardwired to avro for now
            self.properties.append(
                (os.path.join("it/crs4/pydoop/mapreduce/pipes",
                              PROP_BN), PROP_FN))
            self.java_files.extend(
                glob.glob('src/v2/it/crs4/pydoop/pipes/*.java'))
            self.java_files.extend(
                glob.glob('src/v2/it/crs4/pydoop/mapreduce/pipes/*.java'))
            # for things such as avro-mapreduce
            self.dependencies.extend(glob.glob('lib/*.jar'))
        else:
            # Else we should be dealing with v1 pipes
            self.java_files.extend(
                glob.glob('src/v1/org/apache/hadoop/mapred/pipes/*.java'))

        if hadoop_vinfo.has_mrv2():
            # If the installation has MRv2 we need to use v2 I/O classes
            self.java_files.extend(
                glob.glob('src/v2/it/crs4/pydoop/mapreduce/lib/output/*.java'))
            self.java_files.extend(
                ["src/v2/it/crs4/pydoop/NoSeparatorTextOutputFormat.java"])
        else:
            self.java_files.extend(
                ["src/v1/it/crs4/pydoop/NoSeparatorTextOutputFormat.java"])
Exemple #4
0
 def __init__(self, hadoop_vinfo):
     self.hadoop_vinfo = hadoop_vinfo
     self.jar_name = pydoop.jar_name(self.hadoop_vinfo)
     self.classpath = pydoop.hadoop_classpath()
     self.java_files = []
     self.dependencies = []
     self.properties = []
     if (hadoop_vinfo.main >= (2, 0, 0)
         and (not hadoop_vinfo.is_cloudera()
              or hadoop_vinfo.is_yarn())):
         # FIXME: kinda hardwired to avro for now
         self.properties.append((os.path.join(
             "it/crs4/pydoop/mapreduce/pipes", PROP_BN),
             PROP_FN))
         self.java_files.extend([
             "src/v2/it/crs4/pydoop/NoSeparatorTextOutputFormat.java"
         ])
         self.java_files.extend(glob.glob(
             'src/v2/it/crs4/pydoop/pipes/*.java'
         ))
         self.java_files.extend(glob.glob(
             'src/v2/it/crs4/pydoop/mapreduce/pipes/*.java'
         ))
         # for now we have only hadoop2 deps (avro-mapred)
         self.dependencies.extend(glob.glob('lib/*.jar'))
     else:
         self.java_files.extend([
             "src/v1/it/crs4/pydoop/NoSeparatorTextOutputFormat.java"
         ])
         self.java_files.extend(glob.glob(
             'src/v1/org/apache/hadoop/mapred/pipes/*.java'
         ))
Exemple #5
0
 def __init__(self, hadoop_vinfo):
     self.hadoop_vinfo = hadoop_vinfo
     self.jar_name = pydoop.jar_name(self.hadoop_vinfo)
     self.classpath = pydoop.hadoop_classpath()
     self.java_files = []
     self.dependencies = []
     self.properties = []
     if (hadoop_vinfo.main >= (2, 0, 0) and
         (not hadoop_vinfo.is_cloudera() or hadoop_vinfo.is_yarn())):
         # FIXME: kinda hardwired to avro for now
         self.properties.append(
             (os.path.join("it/crs4/pydoop/mapreduce/pipes",
                           PROP_BN), PROP_FN))
         self.java_files.extend(
             ["src/v2/it/crs4/pydoop/NoSeparatorTextOutputFormat.java"])
         self.java_files.extend(
             glob.glob('src/v2/it/crs4/pydoop/pipes/*.java'))
         self.java_files.extend(
             glob.glob('src/v2/it/crs4/pydoop/mapreduce/pipes/*.java'))
         # for now we have only hadoop2 deps (avro-mapred)
         self.dependencies.extend(glob.glob('lib/*.jar'))
     else:
         self.java_files.extend(
             ["src/v1/it/crs4/pydoop/NoSeparatorTextOutputFormat.java"])
         self.java_files.extend(
             glob.glob('src/v1/org/apache/hadoop/mapred/pipes/*.java'))
Exemple #6
0
 def __init__(self, hadoop_vinfo, pipes_src_dir):
   self.hadoop_vinfo = hadoop_vinfo
   self.jar_name = pydoop.jar_name(self.hadoop_vinfo)
   self.classpath = pydoop.hadoop_classpath()
   self.java_files = ["src/it/crs4/pydoop/NoSeparatorTextOutputFormat.java"]
   if self.hadoop_vinfo.has_security():
     if hadoop_vinfo.cdh >= (4, 0, 0) and not hadoop_vinfo.ext:
       return  # TODO: add support for mrv2
     # add our fix for https://issues.apache.org/jira/browse/MAPREDUCE-4000
     self.java_files.extend(glob.glob("%s/*" % pipes_src_dir))
Exemple #7
0
 def __init__(self, hadoop_vinfo, pipes_src_dir):
   self.hadoop_vinfo = hadoop_vinfo
   self.jar_name = pydoop.jar_name(self.hadoop_vinfo)
   self.classpath = pydoop.hadoop_classpath()
   self.java_files = ["src/it/crs4/pydoop/NoSeparatorTextOutputFormat.java"]
   if self.hadoop_vinfo.has_security():
     if hadoop_vinfo.cdh >= (4, 0, 0) and not hadoop_vinfo.ext:
       return  # TODO: add support for mrv2
     # add our fix for https://issues.apache.org/jira/browse/MAPREDUCE-4000
     self.java_files.extend(glob.glob("%s/*" % pipes_src_dir))
Exemple #8
0
 def __init__(self):
     self.jar_name = pydoop.jar_name()
     self.classpath = pydoop.hadoop_classpath()
     self.java_files = glob.glob(
         "src/it/crs4/pydoop/mapreduce/pipes/*.java") + [
             "src/it/crs4/pydoop/NoSeparatorTextOutputFormat.java"
         ]
     self.dependencies = glob.glob('lib/*.jar')
     self.properties = [(os.path.join("it/crs4/pydoop/mapreduce/pipes",
                                      PROP_BN), PROP_FN)]
Exemple #9
0
 def __init__(self):
     self.jar_name = pydoop.jar_name()
     self.classpath = pydoop.hadoop_classpath()
     self.java_files = glob.glob(
         "src/it/crs4/pydoop/mapreduce/pipes/*.java"
     ) + ["src/it/crs4/pydoop/NoSeparatorTextOutputFormat.java"]
     self.dependencies = glob.glob('lib/*.jar')
     self.properties = [(
         os.path.join("it/crs4/pydoop/mapreduce/pipes", PROP_BN),
         PROP_FN
     )]
Exemple #10
0
 def __init__(self, hadoop_vinfo):
     self.hadoop_vinfo = hadoop_vinfo
     self.jar_name = pydoop.jar_name(self.hadoop_vinfo)
     self.classpath = pydoop.hadoop_classpath()
     self.java_files = []
     #if hadoop_vinfo.main >= (2, 2, 0):
     if hadoop_vinfo.main >= (2, 0, 0) and hadoop_vinfo.is_yarn():
         self.java_files.extend(
             ["src/v2/it/crs4/pydoop/NoSeparatorTextOutputFormat.java"])
         self.java_files.extend(
             glob.glob('src/v2/it/crs4/pydoop/pipes/*.java'))
         self.java_files.extend(
             glob.glob('src/v2/it/crs4/pydoop/mapreduce/pipes/*.java'))
     else:
         self.java_files.extend(
             ["src/v1/it/crs4/pydoop/NoSeparatorTextOutputFormat.java"])
         self.java_files.extend(
             glob.glob('src/v1/org/apache/hadoop/mapred/pipes/*.java'))
Exemple #11
0
    def __init__(self, hadoop_vinfo):
        self.hadoop_vinfo = hadoop_vinfo
        self.jar_name = pydoop.jar_name(self.hadoop_vinfo)
        self.classpath = pydoop.hadoop_classpath()
        self.java_files = []
        self.dependencies = []
        self.properties = []
        if hadoop_vinfo.main >= (2, 0, 0) and \
           (not hadoop_vinfo.is_cloudera() or hadoop_vinfo.is_yarn()):
            # This version of Hadoop has the v2 pipes API
            # FIXME: kinda hardwired to avro for now
            self.properties.append((os.path.join(
                "it/crs4/pydoop/mapreduce/pipes", PROP_BN),
                PROP_FN))
            self.java_files.extend(glob.glob(
                'src/v2/it/crs4/pydoop/pipes/*.java'
            ))
            self.java_files.extend(glob.glob(
                'src/v2/it/crs4/pydoop/mapreduce/pipes/*.java'
            ))
            # for things such as avro-mapreduce
            self.dependencies.extend(glob.glob('lib/*.jar'))
        else:
            # Else we should be dealing with v1 pipes
            self.java_files.extend(glob.glob(
                'src/v1/org/apache/hadoop/mapred/pipes/*.java'
            ))

        if hadoop_vinfo.has_mrv2():
            # If the installation has MRv2 we need to use v2 I/O classes
            self.java_files.extend(glob.glob(
                'src/v2/it/crs4/pydoop/mapreduce/lib/output/*.java'
            ))
            self.java_files.extend([
                "src/v2/it/crs4/pydoop/NoSeparatorTextOutputFormat.java"
            ])
        else:
            self.java_files.extend([
                "src/v1/it/crs4/pydoop/NoSeparatorTextOutputFormat.java"
            ])