Beispiel #1
0
 def map_side_join(
         self,
         right: Native,
         key: UniKey,
         how: How = JoinType.Left,
         right_is_uniq: bool = True,
         inplace: bool = False,
 ) -> Optional[Native]:
     key = arg.get_names(key)
     keys = arg.update([key])
     if not isinstance(how, JoinType):
         how = JoinType(how)
     joined_items = algo.map_side_join(
         iter_left=self.get_items(),
         iter_right=right.get_items(),
         key_function=fs.composite_key(keys),
         merge_function=fs.merge_two_items(),
         dict_function=fs.items_to_dict(),
         how=how,
         uniq_right=right_is_uniq,
     )
     if self.is_in_memory():
         joined_items = list(joined_items)
     if inplace:
         self.set_items(joined_items, count=self.get_count(), inplace=True)
     else:
         stream = self.stream(joined_items)
         meta = self.get_compatible_static_meta()
         stream = stream.set_meta(**meta)
         return self._assume_native(stream)
Beispiel #2
0
 def disk_sort(
     self,
     key: UniKey = fs.same(),
     reverse: bool = False,
     step: AutoCount = AUTO,
     verbose: AutoBool = False,
 ) -> Native:
     step = Auto.delayed_acquire(step, self.get_limit_items_in_memory)
     key_function = fs.composite_key(key)
     stream_parts = self.split_to_disk_by_step(
         step=step,
         sort_each_by=key_function,
         reverse=reverse,
         verbose=verbose,
     )
     assert stream_parts, 'streams must be non-empty'
     iterables = [f.get_iter() for f in stream_parts]
     counts = [f.get_count() or 0 for f in stream_parts]
     self.log('Merging {} parts... '.format(len(iterables)),
              verbose=verbose)
     items = algo.merge_iter(
         iterables,
         key_function=key_function,
         reverse=reverse,
         post_action=self.get_tmp_files().remove_all,
     )
     stream = self.stream(items, count=sum(counts))
     return self._assume_native(stream)
Beispiel #3
0
 def memory_sort(self,
                 key: UniKey = fs.same(),
                 reverse: bool = False,
                 verbose: AutoBool = False) -> Native:
     key_function = fs.composite_key(key)
     list_to_sort = self.get_list()
     count = len(list_to_sort)
     self.log('Sorting {} items in memory...'.format(count),
              end='\r',
              verbose=verbose)
     sorted_items = sorted(
         list_to_sort,
         key=key_function,
         reverse=reverse,
     )
     self.log('Sorting has been finished.', end='\r', verbose=verbose)
     self._count = len(sorted_items)
     stream = self.stream(sorted_items)
     return self._assume_native(stream)
Beispiel #4
0
 def sort(self,
          *keys,
          reverse: bool = False,
          step: AutoCount = AUTO,
          verbose: AutoBool = True) -> Native:
     keys = update(keys)
     step = Auto.delayed_acquire(step, self.get_limit_items_in_memory)
     if len(keys) == 0:
         key_function = fs.same()
     else:
         key_function = fs.composite_key(keys)
     if self.can_be_in_memory(step=step) or step is None:
         stream = self.memory_sort(key_function,
                                   reverse=reverse,
                                   verbose=verbose)
     else:
         stream = self.disk_sort(key_function,
                                 reverse=reverse,
                                 step=step,
                                 verbose=verbose)
     return self._assume_native(stream)
Beispiel #5
0
 def sorted_join(
     self,
     right: Native,
     key: UniKey,
     how: How = JoinType.Left,
     sorting_is_reversed: bool = False,
 ) -> Native:
     keys = update([key])
     if not isinstance(how, JoinType):
         how = JoinType(how)
     joined_items = algo.sorted_join(
         iter_left=self.get_iter(),
         iter_right=right.get_iter(),
         key_function=fs.composite_key(keys),
         merge_function=fs.merge_two_items(),
         order_function=bf.is_ordered(reverse=sorting_is_reversed,
                                      including=True),
         how=how,
     )
     stream = self.stream(
         list(joined_items) if self.is_in_memory() else joined_items,
         **self.get_static_meta())
     return self._assume_native(stream)
Beispiel #6
0
 def map_side_join(
     self,
     right: Native,
     key,
     how: How = JoinType.Left,
     right_is_uniq: bool = True,
     inplace: bool = False,
 ) -> Native:
     key = get_names(key)
     keys = update([key])
     if not isinstance(how, JoinType):
         how = JoinType(how)
     joined_items = map_side_join(
         iter_left=self.get_items(),
         iter_right=right.get_items(),
         key_function=fs.composite_key(keys),
         merge_function=fs.merge_two_items(),
         dict_function=fs.items_to_dict(),
         how=how,
         uniq_right=right_is_uniq,
     )
     if self.is_in_memory():
         joined_items = list(joined_items)
     return self.set_items(joined_items, inplace=inplace)