예제 #1
0
 def good(self):
     base_path = make_random_str()
     for path in base_path, base_path + UNI_CHR:
         hdfs.dump("foo\n", path)
         self.assertTrue(hdfs.path.exists(path))
         hdfs.rmr(path)
         self.assertFalse(hdfs.path.exists(path))
예제 #2
0
 def dump(self):
   for test_path in self.hdfs_paths[0], self.local_paths[0]:
     hdfs.dump(self.data, test_path)
     with hdfs.open(test_path) as fi:
       rdata = fi.read()
     fi.fs.close()
     self.assertEqual(rdata, self.data)
예제 #3
0
파일: submit.py 프로젝트: kikkomep/pydoop
    def __setup_remote_paths(self):
        """
        Actually create the working directory and copy the module into it.

        Note: the script has to be readable by Hadoop; though this may not
        generally be a problem on HDFS, where the Hadoop user is usually
        the superuser, things may be different if our working directory is
        on a shared POSIX filesystem.  Therefore, we make the directory
        and the script accessible by all.
        """
        self.logger.debug("remote_wd: %s", self.remote_wd)
        self.logger.debug("remote_exe: %s", self.remote_exe)
        self.logger.debug("remotes: %s", self.files_to_upload)
        if self.args.module:
            self.logger.debug(
                'Generated pipes_code:\n\n %s', self._generate_pipes_code()
            )
        if not self.args.pretend:
            hdfs.mkdir(self.remote_wd)
            hdfs.chmod(self.remote_wd, "a+rx")
            self.logger.debug("created and chmod-ed: %s", self.remote_wd)
            pipes_code = self._generate_pipes_code()
            hdfs.dump(pipes_code, self.remote_exe)
            self.logger.debug("dumped pipes_code to: %s", self.remote_exe)
            hdfs.chmod(self.remote_exe, "a+rx")
            self.__warn_user_if_wd_maybe_unreadable(self.remote_wd)
            for (l, h, _) in self.files_to_upload:
                self.logger.debug("uploading: %s to %s", l, h)
                hdfs.cp(l, h)
        self.logger.debug("Created%sremote paths:" %
                          (' [simulation] ' if self.args.pretend else ' '))
예제 #4
0
 def dump(self):
     for test_path in self.hdfs_paths[0], self.local_paths[0]:
         hdfs.dump(self.data, test_path, mode="wb")
         with hdfs.open(test_path) as fi:
             rdata = fi.read()
         fi.fs.close()
         self.assertEqual(rdata, self.data)
예제 #5
0
 def good(self):
     base_path = make_random_str()
     for path in base_path, base_path + UNI_CHR:
         hdfs.dump("foo\n", path)
         self.assertTrue(hdfs.path.exists(path))
         hdfs.rmr(path)
         self.assertFalse(hdfs.path.exists(path))
예제 #6
0
def write_output(k, args, logger):
    logger.debug("kinship matrix: shape=%r" % (k.shape, ))
    logger.info("serializing output")
    s = KinshipBuilder.serialize(k)
    logger.debug("serialized matrix: %d bytes" % len(s))
    logger.info("writing output to %r" % (args.output, ))
    hdfs.dump(s, args.output, user=args.hdfs_user)
예제 #7
0
파일: HdfsIO.py 프로젝트: yuyiguo/WMArchive
    def _write(self, data):
        "Internal Write API"
        schema = self.schema
        wmaid = self.wmaid(data)
        year, month, _ = today()
        hdir = '%s/%s/%s' % (self.hdir, year, month)
        if  not hdfs.path.isdir(hdir):
            hdfs.mkdir(hdir)
        fname = file_name(hdir, wmaid, self.compress)

        # create Avro writer and binary encoder
        writer = avro.io.DatumWriter(schema)
        bytes_writer = io.BytesIO()

        if  self.compress:
            # use gzip'ed writer with BytesIO file object
            gzip_writer = gzip.GzipFile(fileobj=bytes_writer, mode='wb')
            encoder = avro.io.BinaryEncoder(gzip_writer)
        else:
            # plain binary reader
            encoder = avro.io.BinaryEncoder(bytes_writer)

        # write records from given data stream to binary writer
        writer.write(data, encoder)

        # close gzip stream if necessary
        if  self.compress:
            gzip_writer.flush()
            gzip_writer.close()

        # store raw data to hadoop via HDFS
        hdfs.dump(bytes_writer.getvalue(), fname)

        # close bytes stream
        bytes_writer.close()
예제 #8
0
    def __setup_remote_paths(self):
        """
        Actually create the working directory and copy the module into it.

        Note: the script has to be readable by Hadoop; though this may not
        generally be a problem on HDFS, where the Hadoop user is usually
        the superuser, things may be different if our working directory is
        on a shared POSIX filesystem.  Therefore, we make the directory
        and the script accessible by all.
        """
        self.logger.debug("remote_wd: %s", self.remote_wd)
        self.logger.debug("remote_exe: %s", self.remote_exe)
        self.logger.debug("remotes: %s", self.files_to_upload)
        if self.args.module:
            self.logger.debug('Generated pipes_code:\n\n %s',
                              self._generate_pipes_code())
        if not self.args.pretend:
            hdfs.mkdir(self.remote_wd)
            hdfs.chmod(self.remote_wd, "a+rx")
            self.logger.debug("created and chmod-ed: %s", self.remote_wd)
            pipes_code = self._generate_pipes_code()
            hdfs.dump(pipes_code, self.remote_exe)
            self.logger.debug("dumped pipes_code to: %s", self.remote_exe)
            hdfs.chmod(self.remote_exe, "a+rx")
            self.__warn_user_if_wd_maybe_unreadable(self.remote_wd)
            for (l, h, _) in self.files_to_upload:
                self.logger.debug("uploading: %s to %s", l, h)
                hdfs.cp(l, h)
        self.logger.debug("Created%sremote paths:" %
                          (' [simulation] ' if self.args.pretend else ' '))
예제 #9
0
    def _write(self, data):
        "Internal Write API"
        schema = self.schema
        wmaid = self.wmaid(data)
        year, month, _ = today()
        hdir = '%s/%s/%s' % (self.hdir, year, month)
        if not hdfs.path.isdir(hdir):
            hdfs.mkdir(hdir)
        fname = file_name(hdir, wmaid, self.compress)

        # create Avro writer and binary encoder
        writer = avro.io.DatumWriter(schema)
        bytes_writer = io.BytesIO()

        if self.compress:
            # use gzip'ed writer with BytesIO file object
            gzip_writer = gzip.GzipFile(fileobj=bytes_writer, mode='wb')
            encoder = avro.io.BinaryEncoder(gzip_writer)
        else:
            # plain binary reader
            encoder = avro.io.BinaryEncoder(bytes_writer)

        # write records from given data stream to binary writer
        writer.write(data, encoder)

        # close gzip stream if necessary
        if self.compress:
            gzip_writer.flush()
            gzip_writer.close()

        # store raw data to hadoop via HDFS
        hdfs.dump(bytes_writer.getvalue(), fname)

        # close bytes stream
        bytes_writer.close()
예제 #10
0
def write(writeFlag):
    if (writeFlag == True):
        # instantiate hadoop
        hdfs.hdfs()
        
        targetPath = config.targetPath;
        targetDirectory = config.targetDirectory;
        sourceFile = config.sourceFile;
        
        print("Target Path: " + targetPath);
        print("Target Directory: " + targetDirectory);
        print("Source Path: " + sourceFile);
        
        dumpFile = open(sourceFile, "r");
        fullText = dumpFile.read();
        dumpFile.close();
        
        # write to hadoop
        #hdfs.mkdir(targetDirectory)
        hdfs.dump(fullText, targetPath)
#hdfs.cp(sourceFile, targetPath)

#print (hdfs.ls("test4"))
#files = hdfs.ls("test4")

# read from hadoop
#hdfs.get("test4/hello.txt", "/tmp/hello.txt")
#with open("/tmp/hello.txt") as f:
#	print f.read()

#print(hdfs.ls("test", "hduser1"))
#text = hdfs.load("test/hello.txt")
#print text
예제 #11
0
 def get(self):
   src = self.hdfs_paths[0]
   dest = hdfs.path.split(self.local_paths[0])[-1]
   hdfs.dump(self.data, src)
   hdfs.get(src, dest)
   with open(dest) as fi:
     rdata = fi.read()
   self.assertEqual(rdata, self.data)
예제 #12
0
 def renames(self):
     test_path = self.hdfs_paths[0]
     hdfs.dump(self.data, test_path)
     new_d = hdfs.path.join(self.hdfs_wd, "new_dir")
     new_path = hdfs.path.join(new_d, "new_p")
     hdfs.renames(test_path, new_path)
     self.assertFalse(hdfs.path.exists(test_path))
     self.assertTrue(hdfs.path.exists(new_path))
예제 #13
0
 def renames(self):
     test_path = self.hdfs_paths[0]
     hdfs.dump(self.data, test_path, mode="wb")
     new_d = hdfs.path.join(self.hdfs_wd, "new_dir")
     new_path = hdfs.path.join(new_d, "new_p")
     hdfs.renames(test_path, new_path)
     self.assertFalse(hdfs.path.exists(test_path))
     self.assertTrue(hdfs.path.exists(new_path))
예제 #14
0
 def get(self):
     src = self.hdfs_paths[0]
     dest = hdfs.path.split(self.local_paths[0])[-1]
     hdfs.dump(self.data, src, mode="wb")
     hdfs.get(src, dest, mode="wb")
     with open(dest, 'rb') as fi:
         rdata = fi.read()
     self.assertEqual(rdata, self.data)
예제 #15
0
 def rename(self):
     test_path = self.hdfs_paths[0]
     new_path = "%s.new" % test_path
     hdfs.dump(self.data, test_path, mode="wb")
     hdfs.rename(test_path, new_path)
     self.assertFalse(hdfs.path.exists(test_path))
     self.assertTrue(hdfs.path.exists(new_path))
     self.assertRaises(RuntimeError, hdfs.rename, test_path,
                       self.local_paths[0])
예제 #16
0
파일: hadut.py 프로젝트: crs4/pydoop
 def set_exe(self, pipes_code):
     """
     Dump launcher code to the distributed file system.
     """
     if not self.output:
         raise RuntimeError("no output directory, can't create launcher")
     parent = hdfs.path.dirname(hdfs.path.abspath(self.output.rstrip("/")))
     self.exe = hdfs.path.join(parent, utils.make_random_str())
     hdfs.dump(pipes_code, self.exe)
예제 #17
0
파일: hadut.py 프로젝트: onlynone/pydoop
 def set_exe(self, pipes_code):
     """
 Dump launcher code to the distributed file system.
 """
     if not self.output:
         raise RuntimeError("no output directory, can't create launcher")
     parent = hdfs.path.dirname(hdfs.path.abspath(self.output.rstrip("/")))
     self.exe = hdfs.path.join(parent, utils.make_random_str())
     hdfs.dump(pipes_code, self.exe)
예제 #18
0
 def __ls(self, ls_func, path_transform):
   for wd, paths in izip(
     (self.local_wd, self.hdfs_wd), (self.local_paths, self.hdfs_paths)
     ):
     for p in paths:
       hdfs.dump(self.data, p)
       self.assertEqual(path_transform(ls_func(p)[0]), p)
     dir_list = [path_transform(p) for p in ls_func(wd)]
     self.assertEqual(set(dir_list), set(paths))
예제 #19
0
 def rename(self):
     test_path = self.hdfs_paths[0]
     new_path = "%s.new" % test_path
     hdfs.dump(self.data, test_path)
     hdfs.rename(test_path, new_path)
     self.assertFalse(hdfs.path.exists(test_path))
     self.assertTrue(hdfs.path.exists(new_path))
     self.assertRaises(
         RuntimeError, hdfs.rename, test_path, self.local_paths[0]
     )
예제 #20
0
 def get_hosts(self):
     if hdfs.default_is_local():
         # only run on HDFS
         return
     hdfs.dump(self.data, self.hdfs_paths[0], mode="wb")
     fs = hdfs.hdfs("default", 0)
     hs = fs.get_hosts(self.hdfs_paths[0], 0, 10)
     self.assertTrue(len(hs) > 0)
     self.assertRaises(ValueError, fs.get_hosts, self.hdfs_paths[0], -10,
                       10)
     self.assertRaises(ValueError, fs.get_hosts, self.hdfs_paths[0], 0, -10)
예제 #21
0
 def runTest(self):
     path = make_random_str() + UNI_CHR
     hdfs.dump("foo\n", path)
     st = hdfs.path.stat(path)
     atime, mtime = [getattr(st, 'st_%stime' % _) for _ in 'am']
     new_atime, new_mtime = atime + 100, mtime + 200
     hdfs.path.utime(path, (new_atime, new_mtime))
     st = hdfs.path.stat(path)
     self.assertEqual(st.st_atime, new_atime)
     self.assertEqual(st.st_mtime, new_mtime)
     hdfs.rmr(path)
예제 #22
0
파일: test_hdfs.py 프로젝트: xuande/pydoop
 def __make_tree(self, wd):
     d1 = "%s/d1" % wd
     t1 = FSTree(d1)
     d2 = "%s/d2" % d1
     t2 = t1.add(d2)
     hdfs.mkdir(d2)
     for t, d, bn in ((t1, d1, "f1"), (t2, d2, "f2")):
         f = "%s/%s" % (d, bn)
         hdfs.dump(self.data, f)
         t.add(f, 0)
     return t1
예제 #23
0
 def __make_tree(self, wd):
   d1 = "%s/d1" % wd
   t1 = FSTree(d1)
   d2 = "%s/d2" % d1
   t2 = t1.add(d2)
   hdfs.mkdir(d2)
   for t, d, bn in ((t1, d1, "f1"), (t2, d2, "f2")):
     f = "%s/%s" % (d, bn)
     hdfs.dump(self.data, f)
     t.add(f, 0)
   return t1
예제 #24
0
def run_phase_two(args, mappers, input_, logger):
    launcher_text = generate_launcher("bl.core.gt.mr.kinship.phase_two")
    launcher_name, mr_out_dir = random_str(args), random_str(args)
    logger.debug("launcher_name: %r" % (launcher_name, ))
    logger.debug("mr_out_dir: %r" % (mr_out_dir, ))
    hdfs.dump(launcher_text, launcher_name)
    mr_conf = MR_CONF.copy()
    mr_conf["mapred.map.tasks"] = mappers
    mr_conf["mapred.job.name"] = "kinship_phase_two"
    hadut.run_pipes(launcher_name, input_, mr_out_dir, properties=mr_conf)
    return mr_out_dir
예제 #25
0
 def runTest(self):
     path = make_random_str() + UNI_CHR
     hdfs.dump("foo\n", path)
     st = hdfs.path.stat(path)
     atime, mtime = [getattr(st, 'st_%stime' % _) for _ in 'am']
     new_atime, new_mtime = atime + 100, mtime + 200
     hdfs.path.utime(path, (new_atime, new_mtime))
     st = hdfs.path.stat(path)
     self.assertEqual(st.st_atime, new_atime)
     self.assertEqual(st.st_mtime, new_mtime)
     hdfs.rmr(path)
예제 #26
0
 def get_hosts(self):
     if hdfs.default_is_local():
         # only run on HDFS
         return
     hdfs.dump(self.data, self.hdfs_paths[0])
     fs = hdfs.hdfs("default", 0)
     hs = fs.get_hosts(self.hdfs_paths[0], 0, 10)
     self.assertTrue(len(hs) > 0)
     self.assertRaises(
         ValueError, fs.get_hosts, self.hdfs_paths[0], -10, 10
     )
     self.assertRaises(ValueError, fs.get_hosts, self.hdfs_paths[0], 0, -10)
예제 #27
0
 def create(self, path, mode, fi=None):
     '''Open a nonexistent file. This will just create a new file and generate a
     new filehandle for you.'''
     mode = mode & 1
     maxfh = resource.getrlimit(resource.RLIMIT_NOFILE)[0]
     if len(self.filehandles) == maxfh:
         raise FuseOSError(errno.EMFILE)
     while self.fhmax in self.filehandles.keys():
         self.fhmax  = (self.fhmax+1)%maxfh
     hdfs.dump('',path)
     self.filehandles[self.fhmax] = self.hdfs.open_file(path, mode)
     return self.fhmax
예제 #28
0
 def __cp_file(self, wd):
     fn = "%s/fn" % wd
     hdfs.dump(self.data, fn, mode="wb")
     dest_dir = "%s/dest_dir" % wd
     hdfs.mkdir(dest_dir)
     fn_copy_on_wd = "%s/fn_copy" % wd
     hdfs.cp(fn, fn_copy_on_wd, mode="wb")
     self.assertEqual(hdfs.load(fn_copy_on_wd), self.data)
     self.assertRaises(IOError, hdfs.cp, fn, fn_copy_on_wd)
     fn_copy_on_dest_dir = "%s/fn" % dest_dir
     hdfs.cp(fn, dest_dir, mode="wb")
     self.assertEqual(hdfs.load(fn_copy_on_dest_dir), self.data)
     self.assertRaises(IOError, hdfs.cp, fn, dest_dir)
예제 #29
0
 def __cp_file(self, wd):
   fn = "%s/fn" % wd
   hdfs.dump(self.data, fn)
   dest_dir = "%s/dest_dir" % wd
   hdfs.mkdir(dest_dir)
   fn_copy_on_wd = "%s/fn_copy" % wd
   hdfs.cp(fn, fn_copy_on_wd)
   self.assertEqual(hdfs.load(fn_copy_on_wd), self.data)
   self.assertRaises(IOError, hdfs.cp, fn, fn_copy_on_wd)
   fn_copy_on_dest_dir = "%s/fn" % dest_dir
   hdfs.cp(fn, dest_dir)
   self.assertEqual(hdfs.load(fn_copy_on_dest_dir), self.data)
   self.assertRaises(IOError, hdfs.cp, fn, dest_dir)
예제 #30
0
 def test_kind(self):
     for path in self.path, self.u_path:
         self.assertTrue(hdfs.path.kind(path) is None)
         try:
             hdfs.dump("foo\n", path)
             self.assertEqual('file', hdfs.path.kind(path))
             hdfs.rmr(path)
             hdfs.mkdir(path)
             self.assertEqual('directory', hdfs.path.kind(path))
         finally:
             try:
                 hdfs.rmr(path)
             except IOError:
                 pass
예제 #31
0
 def test_kind(self):
   path = utils.make_random_str()
   self.assertTrue(hdfs.path.kind(path) is None)
   try:
     hdfs.dump("foo\n", path)
     self.assertEqual('file', hdfs.path.kind(path))
     hdfs.rmr(path)
     hdfs.mkdir(path)
     self.assertEqual('directory', hdfs.path.kind(path))
   finally:
     try:
       hdfs.rmr(path)
     except IOError:
       pass
예제 #32
0
 def test_isdir(self):
   path = utils.make_random_str()
   self.assertFalse(hdfs.path.isdir(path))
   try:
     hdfs.dump("foo\n", path)
     self.assertFalse(hdfs.path.isdir(path))
     hdfs.rmr(path)
     hdfs.mkdir(path)
     self.assertTrue(hdfs.path.isdir(path))
   finally:
     try:
       hdfs.rmr(path)
     except IOError:
       pass
예제 #33
0
 def test_isdir(self):
     for path in self.path, self.u_path:
         self.assertFalse(hdfs.path.isdir(path))
         try:
             hdfs.dump("foo\n", path)
             self.assertFalse(hdfs.path.isdir(path))
             hdfs.rmr(path)
             hdfs.mkdir(path)
             self.assertTrue(hdfs.path.isdir(path))
         finally:
             try:
                 hdfs.rmr(path)
             except IOError:
                 pass
예제 #34
0
def lowercase():
    f = request.files['file']
    f.save(
        os.path.join(app.config['UPLOAD_FOLDER'], secure_filename(f.filename)))
    with open(f.filename, 'r') as fopen:
        hdfs.dump(fopen.read(), '/user/input_lowercase/text')
    os.system(
        "pydoop script --num-reducers 0 -t '' lowercase.py /user/input_lowercase /user/output_lowercase"
    )
    list_files = hdfs.hdfs().list_directory('/user/output_lowercase')
    return json.dumps([
        hdfs.load(file['name'], mode='rt') for file in list_files
        if 'SUCCESS' not in file['name']
    ])
예제 #35
0
 def chown(self):
     new_user = '******'
     test_path = self.hdfs_paths[0]
     hdfs.dump(self.data, test_path)
     hdfs.chown(test_path, user=new_user)
     path_info = hdfs.lsl(test_path)[0]
     self.assertEqual(path_info['owner'], new_user)
     prev_owner = path_info['owner']
     prev_grp = path_info['group']
     # owner and group should remain unchanged
     hdfs.chown(test_path, user='', group='')
     path_info = hdfs.lsl(test_path)[0]
     self.assertEqual(path_info['owner'], prev_owner)
     self.assertEqual(path_info['group'], prev_grp)
예제 #36
0
 def test_isdir(self):
   path = utils.make_random_str()
   self.assertFalse(hdfs.path.isdir(path))
   try:
     hdfs.dump("foo\n", path)
     self.assertFalse(hdfs.path.isdir(path))
     hdfs.rmr(path)
     hdfs.mkdir(path)
     self.assertTrue(hdfs.path.isdir(path))
   finally:
     try:
       hdfs.rmr(path)
     except IOError:
       pass
예제 #37
0
 def test_kind(self):
   path = utils.make_random_str()
   self.assertTrue(hdfs.path.kind(path) is None)
   try:
     hdfs.dump("foo\n", path)
     self.assertEqual('file', hdfs.path.kind(path))
     hdfs.rmr(path)
     hdfs.mkdir(path)
     self.assertEqual('directory', hdfs.path.kind(path))
   finally:
     try:
       hdfs.rmr(path)
     except IOError:
       pass
예제 #38
0
def wordcount():
    f = request.files['file']
    f.save(
        os.path.join(app.config['UPLOAD_FOLDER'], secure_filename(f.filename)))
    with open(f.filename, 'r') as fopen:
        hdfs.dump(fopen.read(), '/user/input_wordcount/text')
    os.system(
        'pydoop script -c combiner wordcount.py /user/input_wordcount /user/output_wordcount'
    )
    list_files = hdfs.hdfs().list_directory('/user/output_wordcount')
    return json.dumps([
        hdfs.load(file['name'], mode='rt') for file in list_files
        if 'SUCCESS' not in file['name']
    ])
예제 #39
0
 def chown(self):
     new_user = '******'
     test_path = self.hdfs_paths[0]
     hdfs.dump(self.data, test_path, mode="wb")
     hdfs.chown(test_path, user=new_user)
     path_info = hdfs.lsl(test_path)[0]
     self.assertEqual(path_info['owner'], new_user)
     prev_owner = path_info['owner']
     prev_grp = path_info['group']
     # owner and group should remain unchanged
     hdfs.chown(test_path, user='', group='')
     path_info = hdfs.lsl(test_path)[0]
     self.assertEqual(path_info['owner'], prev_owner)
     self.assertEqual(path_info['group'], prev_grp)
예제 #40
0
 def test_kind(self):
     for path in self.path, self.u_path:
         self.assertTrue(hdfs.path.kind(path) is None)
         try:
             hdfs.dump("foo\n", path)
             self.assertEqual('file', hdfs.path.kind(path))
             hdfs.rmr(path)
             hdfs.mkdir(path)
             self.assertEqual('directory', hdfs.path.kind(path))
         finally:
             try:
                 hdfs.rmr(path)
             except IOError:
                 pass
예제 #41
0
 def test_isdir(self):
     for path in self.path, self.u_path:
         self.assertFalse(hdfs.path.isdir(path))
         try:
             hdfs.dump("foo\n", path)
             self.assertFalse(hdfs.path.isdir(path))
             hdfs.rmr(path)
             hdfs.mkdir(path)
             self.assertTrue(hdfs.path.isdir(path))
         finally:
             try:
                 hdfs.rmr(path)
             except IOError:
                 pass
예제 #42
0
 def __ls(self, ls_func, path_transform):
   for wd, paths in izip(
     (self.local_wd, self.hdfs_wd), (self.local_paths, self.hdfs_paths)
     ):
     for p in paths:
       hdfs.dump(self.data, p)
     test_dir = "%s/%s" % (wd, "test_dir")
     test_path = "%s/%s" % (test_dir, "test_path")
     hdfs.dump(self.data, test_path)
     paths.append(test_dir)
     for recursive in False, True:
       if recursive:
         paths.append(test_path)
       dir_list = [path_transform(p) for p in ls_func(wd, recursive=recursive)]
       self.assertEqual(sorted(dir_list), sorted(paths))
예제 #43
0
 def __ls(self, ls_func, path_transform):
     for wd, paths in czip((self.local_wd, self.hdfs_wd),
                           (self.local_paths, self.hdfs_paths)):
         for p in paths:
             hdfs.dump(self.data, p, mode="wb")
         test_dir = "%s/%s" % (wd, "test_dir")
         test_path = "%s/%s" % (test_dir, "test_path")
         hdfs.dump(self.data, test_path, mode="wb")
         paths.append(test_dir)
         for recursive in False, True:
             if recursive:
                 paths.append(test_path)
             dir_list = [
                 path_transform(p) for p in ls_func(wd, recursive=recursive)
             ]
             self.assertEqual(sorted(dir_list), sorted(paths))
예제 #44
0
파일: test_hdfs.py 프로젝트: crs4/pydoop
 def __make_tree(self, wd, root="d1", create=True):
     """
     d1
     |-- d2
     |   `-- f2
     `-- f1
     """
     d1 = "%s/%s" % (wd, root)
     t1 = FSTree(d1)
     d2 = "%s/d2" % d1
     t2 = t1.add(d2)
     if create:
         hdfs.mkdir(d2)
     for t, d, bn in ((t1, d1, "f1"), (t2, d2, "f2")):
         f = "%s/%s" % (d, bn)
         if create:
             hdfs.dump(self.data, f, mode="wb")
         t.add(f, 0)
     return t1
예제 #45
0
 def __make_tree(self, wd, root="d1", create=True):
     """
     d1
     |-- d2
     |   `-- f2
     `-- f1
     """
     d1 = "%s/%s" % (wd, root)
     t1 = FSTree(d1)
     d2 = "%s/d2" % d1
     t2 = t1.add(d2)
     if create:
         hdfs.mkdir(d2)
     for t, d, bn in ((t1, d1, "f1"), (t2, d2, "f2")):
         f = "%s/%s" % (d, bn)
         if create:
             hdfs.dump(self.data, f, mode="wb")
         t.add(f, 0)
     return t1
예제 #46
0
파일: hdfs.py 프로젝트: nihil0/hops-util-py
def dump(data, hdfs_path):
    """
    Dumps data to a file

    Args:
        :data: data to write to hdfs_path
        :hdfs_path: You can specify either a full hdfs pathname or a relative one (relative to your Project's path in HDFS).
    """

    hdfs_path = _expand_path(hdfs_path, exists=False)
    return hdfs.dump(data, hdfs_path)
예제 #47
0
    def __setup_remote_paths(self):
        """
    Actually create the working directory and copy the module into it.

    Note: the script has to be readable by Hadoop; though this may not
    generally be a problem on HDFS, where the Hadoop user is usually
    the superuser, things may be different if our working directory is
    on a shared POSIX filesystem.  Therefore, we make the directory
    and the script accessible by all.
    """
        pipes_code = self.__generate_pipes_code()
        hdfs.mkdir(self.remote_wd)
        hdfs.chmod(self.remote_wd, "a+rx")
        hdfs.dump(pipes_code, self.remote_exe)
        hdfs.chmod(self.remote_exe, "a+rx")
        hdfs.put(self.args.module, self.remote_module)
        hdfs.chmod(self.remote_module, "a+r")
        self.__warn_user_if_wd_maybe_unreadable(self.remote_wd)
        self.logger.debug("Created remote paths:")
        self.logger.debug(self.remote_wd)
        self.logger.debug(self.remote_exe)
        self.logger.debug(self.remote_module)
예제 #48
0
파일: script.py 프로젝트: ilveroluca/pydoop
  def __setup_remote_paths(self):
    """
    Actually create the working directory and copy the module into it.

    Note: the script has to be readable by Hadoop; though this may not
    generally be a problem on HDFS, where the Hadoop user is usually
    the superuser, things may be different if our working directory is
    on a shared POSIX filesystem.  Therefore, we make the directory
    and the script accessible by all.
    """
    pipes_code = self.__generate_pipes_code()
    hdfs.mkdir(self.remote_wd)
    hdfs.chmod(self.remote_wd, "a+rx")
    hdfs.dump(pipes_code, self.remote_exe)
    hdfs.chmod(self.remote_exe, "a+rx")
    hdfs.put(self.args.module, self.remote_module)
    hdfs.chmod(self.remote_module, "a+r")
    self.__warn_user_if_wd_maybe_unreadable(self.remote_wd)
    self.logger.debug("Created remote paths:")
    self.logger.debug(self.remote_wd)
    self.logger.debug(self.remote_exe)
    self.logger.debug(self.remote_module)
예제 #49
0
def dump(data, hdfs_path):
    """
    Dumps data to a file

    Args:
        :data: data to write to hdfs_path
        :hdfs_path: You can specify either a full hdfs pathname or a relative one (relative to your Project's path in HDFS).
    """

    #split = hdfs_path.split('/')
    #filename = split[len(split) - 1]
    #directory = "/".join(split[0:len(split)-1])
    hdfs_path = _expand_path(hdfs_path, exists=False)
    return hdfs.dump(data, hdfs_path)
예제 #50
0
 def samefile_rel(self):
     p = make_random_str() + UNI_CHR
     hdfs.dump("foo\n", p)
     self.assertTrue(hdfs.path.samefile(p, hdfs.path.abspath(p)))
     hdfs.rmr(p)
예제 #51
0
 def setUp(self):
     self.path = make_random_str() + UNI_CHR
     hdfs.dump("foo\n", self.path)
예제 #52
0
 def samefile_rel(self):
     p = make_random_str() + UNI_CHR
     hdfs.dump("foo\n", p)
     self.assertTrue(hdfs.path.samefile(p, hdfs.path.abspath(p)))
     hdfs.rmr(p)
예제 #53
0
 def good(self):
   path = utils.make_random_str()
   hdfs.dump("foo\n", path)
   self.assertTrue(hdfs.path.exists(path))
   hdfs.rmr(path)
   self.assertFalse(hdfs.path.exists(path))
예제 #54
0
 def load(self):
   for test_path in self.hdfs_paths[0], self.local_paths[0]:
     hdfs.dump(self.data, test_path)
     rdata = hdfs.load(test_path)
     self.assertEqual(rdata, self.data)
예제 #55
0
파일: HdfsIO.py 프로젝트: yuyiguo/WMArchive
 def dump(self, data, fname):
     "Dump given data directly to HDFS"
     hdfs.dump(data, fname)
예제 #56
0
import os
import itertools

import swiftclient
import pydoop.hdfs as hdfs

container = 'w251-enron'
prefix = 'clean_v2'
hdfs_prefix = '/enron'

authurl = os.environ['SWIFT_AUTH_URL']
user = os.environ['SWIFT_USER']
key = os.environ['SWIFT_KEY']

conn = swiftclient.client.Connection(
        authurl=authurl, user=user, key=key)

header, objects = conn.get_container(container, prefix=prefix, full_listing=True)

hdfs.mkdir(hdfs_prefix)

total = len(objects)
count = 1
for obj in objects:
    name = obj['name']
    print 'Downloading %s (%d of %d)' % (name, count, total)
    header, contents = conn.get_object(container, name)
    filename = name.replace('/', '_')
    hdfs.dump(contents, '%s/%s' % (hdfs_prefix, filename))
    count += 1
예제 #57
0
 def setUp(self):
     self.path = make_random_str() + UNI_CHR
     hdfs.dump("foo\n", self.path)
예제 #58
0
 def load(self):
     for test_path in self.hdfs_paths[0], self.local_paths[0]:
         hdfs.dump(self.data, test_path, mode="wb")
         rdata = hdfs.load(test_path)
         self.assertEqual(rdata, self.data)