Esempio n. 1
0
 def _gff_process(self, gff_files, limit_info, target_lines=None):
     """Process GFF addition, using Disco to parallelize the process.
     """
     assert target_lines is None, "Cannot split parallelized jobs"
     # make these imports local; only need them when using disco
     import simplejson
     import disco
     # absolute path names unless they are special disco files
     full_files = []
     for f in gff_files:
         if f.split(":")[0] != "disco":
             full_files.append(os.path.abspath(f))
         else:
             full_files.append(f)
     results = disco.job(
         self._disco_host,
         name="gff_reader",
         input=full_files,
         params=disco.Params(limit_info=limit_info,
                             jsonify=True,
                             filter_info=self._examiner._filter_info),
         required_modules=["simplejson", "collections", "re"],
         map=self._map_fn,
         reduce=self._reduce_fn)
     processed = dict()
     for out_key, out_val in disco.result_iterator(results):
         processed[out_key] = simplejson.loads(out_val)
     yield processed
Esempio n. 2
0
 def _disco_process(self, gff_files, limit_info):
     """Process GFF addition, using Disco to parallelize the process.
     """
     # make these imports local; only need them when using disco
     import simplejson
     import disco
     # absolute path names unless they are special disco files
     full_files = [(os.path.abspath(f) if f.split(":")[0] != "disco" else f)
                   for f in gff_files]
     results = disco.job(
         self._disco_host,
         name="gff_reader",
         input=full_files,
         params=disco.Params(limit_info=limit_info,
                             jsonify=True,
                             filter_info=self._filter_info),
         required_modules=["simplejson", "collections", "re"],
         map=self._map_fn,
         reduce=self._reduce_fn)
     processed = dict()
     for out_key, out_val in disco.result_iterator(results):
         processed[out_key] = simplejson.loads(out_val)
     return processed