Example #1
0
 def _job_completed(self, jobid):
     if jobid == JobType.SCAN:
         self._results_changed()
         fs.filesdb.commit()
         if not self.results.groups:
             self.view.show_message(tr("No duplicates found."))
         else:
             self.view.show_results_window()
     if jobid in {JobType.MOVE, JobType.DELETE}:
         self._results_changed()
     if jobid == JobType.LOAD:
         self._recreate_result_table()
         self._results_changed()
         self.view.show_results_window()
     if jobid in {JobType.COPY, JobType.MOVE, JobType.DELETE}:
         if self.results.problems:
             self.problem_dialog.refresh()
             self.view.show_problem_dialog()
         else:
             if jobid == JobType.COPY:
                 msg = tr("All marked files were copied successfully.")
             elif jobid == JobType.MOVE:
                 msg = tr("All marked files were moved successfully.")
             elif jobid == JobType.DELETE and self.deletion_options.direct:
                 msg = tr("All marked files were deleted successfully.")
             else:
                 msg = tr(
                     "All marked files were successfully sent to Trash.")
             self.view.show_message(msg)
Example #2
0
 def copy_or_move_marked(self, copy):
     """Start an async move (or copy) job on marked duplicates.
     
     :param bool copy: If True, duplicates will be copied instead of moved
     """
     def do(j):
         def op(dupe):
             j.add_progress()
             self.copy_or_move(dupe, copy, destination, desttype)
         
         j.start_job(self.results.mark_count)
         self.results.perform_on_marked(op, not copy)
     
     if not self._check_demo():
         return
     if not self.results.mark_count:
         self.view.show_message(MSG_NO_MARKED_DUPES)
         return
     opname = tr("copy") if copy else tr("move")
     prompt = tr("Select a directory to {} marked files to").format(opname)
     destination = self.view.select_dest_folder(prompt)
     if destination:
         desttype = self.options['copymove_dest_type']
         jobid = JobType.Copy if copy else JobType.Move
         self._start_job(jobid, do)
Example #3
0
 def load(self):
     schedule = self.schedule
     txn = schedule.ref
     self._start_date = txn.date
     self._start_date_fmt = self.table.document.app.format_date(self._start_date)
     self._stop_date = schedule.stop_date
     self._stop_date_fmt = self.table.document.app.format_date(self._stop_date) if self._stop_date is not None else ''
     self._repeat_type = schedule.repeat_type_desc
     self._interval = str(schedule.repeat_every)
     self._description = txn.description
     self._payee = txn.payee
     self._checkno = txn.checkno
     splits = txn.splits
     froms, tos = txn.splitted_splits()
     self._from_count = len(froms)
     self._to_count = len(tos)
     UNASSIGNED = tr('Unassigned') if len(froms) > 1 else ''
     self._from = ', '.join(s.account.name if s.account is not None else UNASSIGNED for s in froms)
     UNASSIGNED = tr('Unassigned') if len(tos) > 1 else ''
     self._to = ', '.join(s.account.name if s.account is not None else UNASSIGNED for s in tos)
     try:
         self._amount = sum(s.amount for s in tos)
     except ValueError: # currency coercing problem
         currency = self.document.default_currency
         self._amount = sum(convert_amount(s.amount, currency, s.transaction.date) for s in tos)
     self._amount_fmt = self.document.format_amount(self._amount)
 def load(self):
     transaction = self.transaction
     self._load_from_fields(transaction, self.FIELDS)
     self._date_fmt = None
     self._position = transaction.position
     splits = transaction.splits
     froms, tos = self.transaction.splitted_splits()
     self._from_count = len(froms)
     self._to_count = len(tos)
     UNASSIGNED = tr('Unassigned') if len(froms) > 1 else ''
     get_display = lambda s: s.account.combined_display if s.account is not None else UNASSIGNED
     self._from = ', '.join(map(get_display, froms))
     UNASSIGNED = tr('Unassigned') if len(tos) > 1 else ''
     get_display = lambda s: s.account.combined_display if s.account is not None else UNASSIGNED
     self._to = ', '.join(map(get_display, tos))
     self._amount = transaction.amount
     self._amount_fmt = None
     self._mtime = datetime.datetime.fromtimestamp(transaction.mtime)
     if transaction.mtime > 0:
         self._mtime_fmt = self._mtime.strftime('%Y/%m/%d %H:%M')
     else:
         self._mtime_fmt = ''
     self._recurrent = isinstance(transaction, Spawn)
     self._reconciled = any(split.reconciled for split in splits)
     self._is_budget = getattr(transaction, 'is_budget', False)
     self._can_set_amount = transaction.can_set_amount
Example #5
0
 def _getmatches(self, files, j):
     if self.size_threshold or self.scan_type in {ScanType.Contents, ScanType.Folders}:
         j = j.start_subjob([2, 8])
         for f in j.iter_with_progress(files, tr("Read size of %d/%d files")):
             f.size # pre-read, makes a smoother progress if read here (especially for bundles)
         if self.size_threshold:
             files = [f for f in files if f.size >= self.size_threshold]
     if self.scan_type in {ScanType.Contents, ScanType.Folders}:
         return engine.getmatches_by_contents(files, j=j)
     else:
         j = j.start_subjob([2, 8])
         kw = {}
         kw['match_similar_words'] = self.match_similar_words
         kw['weight_words'] = self.word_weighting
         kw['min_match_percentage'] = self.min_match_percentage
         if self.scan_type == ScanType.FieldsNoOrder:
             self.scan_type = ScanType.Fields
             kw['no_field_order'] = True
         func = {
             ScanType.Filename: lambda f: engine.getwords(rem_file_ext(f.name)),
             ScanType.Fields: lambda f: engine.getfields(rem_file_ext(f.name)),
             ScanType.Tag: lambda f: [
                 engine.getwords(str(getattr(f, attrname)))
                 for attrname in SCANNABLE_TAGS
                 if attrname in self.scanned_tags
             ],
         }[self.scan_type]
         for f in j.iter_with_progress(files, tr("Read metadata of %d/%d files")):
             logging.debug("Reading metadata of %s", f.path)
             f.words = func(f)
         return engine.getmatches(files, j=j, **kw)
Example #6
0
 def load(self):
     transaction = self.transaction
     self._load_from_fields(transaction, self.FIELDS)
     self._date_fmt = None
     self._position = transaction.position
     splits = transaction.splits
     froms, tos = self.transaction.splitted_splits()
     self._from_count = len(froms)
     self._to_count = len(tos)
     UNASSIGNED = tr('Unassigned') if len(froms) > 1 else ''
     get_display = lambda s: s.account.combined_display if s.account is not None else UNASSIGNED
     self._from = ', '.join(map(get_display, froms))
     UNASSIGNED = tr('Unassigned') if len(tos) > 1 else ''
     get_display = lambda s: s.account.combined_display if s.account is not None else UNASSIGNED
     self._to = ', '.join(map(get_display, tos))
     self._amount = transaction.amount
     self._amount_fmt = None
     self._mtime = datetime.datetime.fromtimestamp(transaction.mtime)
     if transaction.mtime > 0:
         self._mtime_fmt = self._mtime.strftime('%Y/%m/%d %H:%M')
     else:
         self._mtime_fmt = ''
     self._recurrent = isinstance(transaction, Spawn)
     self._reconciled = any(split.reconciled for split in splits)
     self._is_budget = getattr(transaction, 'is_budget', False)
     self._can_set_amount = transaction.can_set_amount
Example #7
0
def getmatches_by_contents(files, j=job.nulljob):
    """Returns a list of :class:`Match` within ``files`` if their contents is the same.

    :param j: A :ref:`job progress instance <jobs>`.
    """
    size2files = defaultdict(set)
    for f in files:
        if f.size:
            size2files[f.size].add(f)
    del files
    possible_matches = [
        files for files in size2files.values() if len(files) > 1
    ]
    del size2files
    result = []
    j.start_job(len(possible_matches), tr("0 matches found"))
    for group in possible_matches:
        for first, second in itertools.combinations(group, 2):
            if first.is_ref and second.is_ref:
                continue  # Don't spend time comparing two ref pics together.
            if first.md5partial == second.md5partial:
                if first.md5 == second.md5:
                    result.append(Match(first, second, 100))
        j.add_progress(desc=tr("%d matches found") % len(result))
    return result
Example #8
0
 def format_criterion_value(self, value):
     return {
         self.ENDS_WITH_NUMBER: tr("Ends with number"),
         self.DOESNT_END_WITH_NUMBER: tr("Doesn't end with number"),
         self.LONGEST: tr("Longest"),
         self.SHORTEST: tr("Shortest"),
     }[value]
Example #9
0
def getmatches(objects,
               min_match_percentage=0,
               match_similar_words=False,
               weight_words=False,
               no_field_order=False,
               j=job.nulljob):
    """Returns a list of :class:`Match` within ``objects`` after fuzzily matching their words.

    :param objects: List of :class:`~core.fs.File` to match.
    :param int min_match_percentage: minimum % of words that have to match.
    :param bool match_similar_words: make similar words (see :func:`merge_similar_words`) match.
    :param bool weight_words: longer words are worth more in match % computations.
    :param bool no_field_order: match :ref:`fields` regardless of their order.
    :param j: A :ref:`job progress instance <jobs>`.
    """
    COMMON_WORD_THRESHOLD = 50
    LIMIT = 5000000
    j = j.start_subjob(2)
    sj = j.start_subjob(2)
    for o in objects:
        if not hasattr(o, 'words'):
            o.words = getwords(o.name)
    word_dict = build_word_dict(objects, sj)
    reduce_common_words(word_dict, COMMON_WORD_THRESHOLD)
    if match_similar_words:
        merge_similar_words(word_dict)
    match_flags = []
    if weight_words:
        match_flags.append(WEIGHT_WORDS)
    if match_similar_words:
        match_flags.append(MATCH_SIMILAR_WORDS)
    if no_field_order:
        match_flags.append(NO_FIELD_ORDER)
    j.start_job(len(word_dict), tr("0 matches found"))
    compared = defaultdict(set)
    result = []
    try:
        # This whole 'popping' thing is there to avoid taking too much memory at the same time.
        while word_dict:
            items = word_dict.popitem()[1]
            while items:
                ref = items.pop()
                compared_already = compared[ref]
                to_compare = items - compared_already
                compared_already |= to_compare
                for other in to_compare:
                    m = get_match(ref, other, match_flags)
                    if m.percentage >= min_match_percentage:
                        result.append(m)
                        if len(result) >= LIMIT:
                            return result
            j.add_progress(desc=tr("%d matches found") % len(result))
    except MemoryError:
        # This is the place where the memory usage is at its peak during the scan.
        # Just continue the process with an incomplete list of matches.
        del compared  # This should give us enough room to call logging.
        logging.warning('Memory Overflow. Matches: %d. Word dict: %d' %
                        (len(result), len(word_dict)))
        return result
    return result
Example #10
0
 def _refresh(self):
     self.clear()
     self.has_multiple_currencies = self.document.accounts.has_multiple_currencies()
     self.assets = self.make_type_node(tr('ASSETS'), AccountType.Asset)
     self.liabilities = self.make_type_node(tr('LIABILITIES'), AccountType.Liability)
     self.net_worth = self._make_node(tr('NET WORTH'))
     net_worth_start = self.assets.start_amount - self.liabilities.start_amount
     net_worth_end = self.assets.end_amount - self.liabilities.end_amount
     budget_date_range = DateRange(date.today(), self.document.date_range.end)
     # The net worth's budget is not a simple subtraction, it must count the target-less budgets
     net_worth_budgeted = self.document.budgeted_amount_for_target(None, budget_date_range)
     net_worth_delta = net_worth_end - net_worth_start
     force_explicit_currency = self.has_multiple_currencies
     self.net_worth.start = self.document.format_amount(
         net_worth_start, force_explicit_currency=force_explicit_currency
     )
     self.net_worth.end = self.document.format_amount(
         net_worth_end, force_explicit_currency=force_explicit_currency
     )
     self.net_worth.budgeted = self.document.format_amount(
         net_worth_budgeted, force_explicit_currency=force_explicit_currency
     )
     self.net_worth.delta = self.document.format_amount(
         net_worth_delta, force_explicit_currency=force_explicit_currency
     )
     self.net_worth.delta_perc = get_delta_perc(net_worth_delta, net_worth_start)
     self.net_worth.is_total = True
     self.append(self.assets)
     self.append(self.liabilities)
     self.append(self.net_worth)
Example #11
0
 def __get_stat_line(self):
     if self.__filtered_dupes is None:
         mark_count = self.mark_count
         marked_size = self.__marked_size
         total_count = self.__total_count
         total_size = self.__total_size
     else:
         mark_count = len([
             dupe for dupe in self.__filtered_dupes if self.is_marked(dupe)
         ])
         marked_size = sum(dupe.size for dupe in self.__filtered_dupes
                           if self.is_marked(dupe))
         total_count = len([
             dupe for dupe in self.__filtered_dupes
             if self.is_markable(dupe)
         ])
         total_size = sum(dupe.size for dupe in self.__filtered_dupes
                          if self.is_markable(dupe))
     if self.mark_inverted:
         marked_size = self.__total_size - marked_size
     result = tr("%d / %d (%s / %s) duplicates marked.") % (
         mark_count,
         total_count,
         format_size(marked_size, 2),
         format_size(total_size, 2),
     )
     if self.__filters:
         result += tr(" filter: %s") % ' --> '.join(self.__filters)
     return result
Example #12
0
 def get_dupe_groups(self, files, j=job.nulljob):
     j = j.start_subjob([8, 2])
     for f in (f for f in files if not hasattr(f, 'is_ref')):
         f.is_ref = False
     files = remove_dupe_paths(files)
     logging.info("Getting matches. Scan type: %d", self.scan_type)
     matches = self._getmatches(files, j)
     logging.info('Found %d matches' % len(matches))
     j.set_progress(100, tr("Removing false matches"))
     # In removing what we call here "false matches", we first want to remove, if we scan by
     # folders, we want to remove folder matches for which the parent is also in a match (they're
     # "duplicated duplicates if you will). Then, we also don't want mixed file kinds if the
     # option isn't enabled, we want matches for which both files exist and, lastly, we don't
     # want matches with both files as ref.
     if self.scan_type == ScanType.Folders and matches:
         allpath = {m.first.path for m in matches}
         allpath |= {m.second.path for m in matches}
         sortedpaths = sorted(allpath)
         toremove = set()
         last_parent_path = sortedpaths[0]
         for p in sortedpaths[1:]:
             if p in last_parent_path:
                 toremove.add(p)
             else:
                 last_parent_path = p
         matches = [m for m in matches if m.first.path not in toremove or m.second.path not in toremove]
     if not self.mix_file_kind:
         matches = [m for m in matches if get_file_ext(m.first.name) == get_file_ext(m.second.name)]
     matches = [m for m in matches if m.first.path.exists() and m.second.path.exists()]
     matches = [m for m in matches if not (m.first.is_ref and m.second.is_ref)]
     if self.ignore_list:
         j = j.start_subjob(2)
         iter_matches = j.iter_with_progress(matches, tr("Processed %d/%d matches against the ignore list"))
         matches = [
             m for m in iter_matches
             if not self.ignore_list.AreIgnored(str(m.first.path), str(m.second.path))
         ]
     logging.info('Grouping matches')
     groups = engine.get_groups(matches, j)
     matched_files = dedupe([m.first for m in matches] + [m.second for m in matches])
     if self.scan_type in {ScanType.Filename, ScanType.Fields, ScanType.FieldsNoOrder, ScanType.Tag}:
         self.discarded_file_count = len(matched_files) - sum(len(g) for g in groups)
     else:
         # Ticket #195
         # To speed up the scan, we don't bother comparing contents of files that are both ref
         # files. However, this messes up "discarded" counting because there's a missing match
         # in cases where we end up with a dupe group anyway (with a non-ref file). Because it's
         # impossible to have discarded matches in exact dupe scans, we simply set it at 0, thus
         # bypassing our tricky problem.
         # Also, although ScanType.FuzzyBlock is not always doing exact comparisons, we also
         # bypass ref comparison, thus messing up with our "discarded" count. So we're
         # effectively disabling the "discarded" feature in PE, but it's better than falsely
         # reporting discarded matches.
         self.discarded_file_count = 0
     groups = [g for g in groups if any(not f.is_ref for f in g)]
     logging.info('Created %d groups' % len(groups))
     j.set_progress(100, tr("Doing group prioritization"))
     for g in groups:
         g.prioritize(self._key_func, self._tie_breaker)
     return groups
Example #13
0
 def _refresh(self):
     self.clear()
     self.has_multiple_currencies = self.document.accounts.has_multiple_currencies()
     self.income = self.make_type_node(tr('INCOME'), AccountType.Income)
     self.expenses = self.make_type_node(tr('EXPENSES'), AccountType.Expense)
     self.net_income = self._make_node(tr('NET INCOME'))
     net_income = self.income.cash_flow_amount - self.expenses.cash_flow_amount
     last_net_income = self.income.last_cash_flow_amount - self.expenses.last_cash_flow_amount
     net_budgeted = self.income.budgeted_amount - self.expenses.budgeted_amount
     delta = net_income - last_net_income
     force_explicit_currency = self.has_multiple_currencies
     self.net_income.cash_flow = self.document.format_amount(
         net_income, force_explicit_currency=force_explicit_currency
     )
     self.net_income.last_cash_flow = self.document.format_amount(
         last_net_income, force_explicit_currency=force_explicit_currency
     )
     self.net_income.budgeted = self.document.format_amount(
         net_budgeted, force_explicit_currency=force_explicit_currency
     )
     self.net_income.delta = self.document.format_amount(
         delta, force_explicit_currency=force_explicit_currency
     )
     self.net_income.delta_perc = get_delta_perc(delta, last_net_income)
     self.net_income.is_total = True
     self.append(self.income)
     self.append(self.expenses)
     self.append(self.net_income)
Example #14
0
 def __init__(self, ref, repeat_type, repeat_every):
     if repeat_type not in RTYPE2INCFUNC:
         # invalid repeat type, default to monthly
         repeat_type = RepeatType.Monthly
     #: :class:`.Transaction`. The model transaction that's going to be regularly spawned.
     self.ref = ref
     self._repeat_type = repeat_type
     self._repeat_every = repeat_every
     #: Date at which our recurrence stops. When ``None`` (the default), it never ends.
     self.stop_date = None
     #: ``recurrent_date -> transaction`` mapping of schedule exceptions.
     self.date2exception = {}
     #: ``recurrent_date -> transaction`` mapping of *global* schedule exceptions.
     self.date2globalchange = {}
     #: ``recurrent_date -> transaction`` mapping of spawns. Used as a cache. Frequently purged.
     self.date2instances = {}
     self.rtype2desc = {
         RepeatType.Daily: tr('Daily'),
         RepeatType.Weekly: tr('Weekly'),
         RepeatType.Monthly: tr('Monthly'),
         RepeatType.Yearly: tr('Yearly'),
         RepeatType.Weekday: '', # dynamic
         RepeatType.WeekdayLast: '', # dynamic
     }
     self._update_rtype_descs()
Example #15
0
 def change_schedule(self, schedule, new_ref, repeat_type, repeat_every, stop_date):
     for split in new_ref.splits:
         if split.account is not None:
             # same as in change_transaction()
             split.account = self.accounts.find(split.account.name, split.account.type)
     if schedule in self.schedules:
         action = Action(tr('Change Schedule'))
         action.change_schedule(schedule)
     else:
         action = Action(tr('Add Schedule'))
         action.added_schedules.add(schedule)
     self._undoer.record(action)
     original = schedule.ref
     min_date = min(original.date, new_ref.date)
     original.set_splits(new_ref.splits)
     original.change(description=new_ref.description, payee=new_ref.payee,
         checkno=new_ref.checkno, notes=new_ref.notes)
     schedule.start_date = new_ref.date
     schedule.repeat_type = repeat_type
     schedule.repeat_every = repeat_every
     schedule.stop_date = stop_date
     schedule.reset_spawn_cache()
     if schedule not in self.schedules:
         self.schedules.append(schedule)
     self._cook(from_date=min_date)
     self.notify('schedule_changed')
Example #16
0
    def copy_or_move_marked(self, copy):
        """Start an async move (or copy) job on marked duplicates.
        
        :param bool copy: If True, duplicates will be copied instead of moved
        """
        def do(j):
            def op(dupe):
                j.add_progress()
                self.copy_or_move(dupe, copy, destination, desttype)

            j.start_job(self.results.mark_count)
            self.results.perform_on_marked(op, not copy)

        if not self._check_demo():
            return
        if not self.results.mark_count:
            self.view.show_message(MSG_NO_MARKED_DUPES)
            return
        opname = tr("copy") if copy else tr("move")
        prompt = tr("Select a directory to {} marked files to").format(opname)
        destination = self.view.select_dest_folder(prompt)
        if destination:
            desttype = self.options['copymove_dest_type']
            jobid = JobType.Copy if copy else JobType.Move
            self._start_job(jobid, do)
Example #17
0
 def _refresh(self):
     self.clear()
     self.has_multiple_currencies = self.document.accounts.has_multiple_currencies(
     )
     self.assets = self.make_type_node(tr('ASSETS'), AccountType.Asset)
     self.liabilities = self.make_type_node(tr('LIABILITIES'),
                                            AccountType.Liability)
     self.net_worth = self._make_node(tr('NET WORTH'))
     net_worth_start = self.assets.start_amount - self.liabilities.start_amount
     net_worth_end = self.assets.end_amount - self.liabilities.end_amount
     budget_date_range = DateRange(date.today(),
                                   self.document.date_range.end)
     # The net worth's budget is not a simple subtraction, it must count the target-less budgets
     net_worth_budgeted = self.document.budgeted_amount_for_target(
         None, budget_date_range)
     net_worth_delta = net_worth_end - net_worth_start
     force_explicit_currency = self.has_multiple_currencies
     self.net_worth.start = self.document.format_amount(
         net_worth_start, force_explicit_currency=force_explicit_currency)
     self.net_worth.end = self.document.format_amount(
         net_worth_end, force_explicit_currency=force_explicit_currency)
     self.net_worth.budgeted = self.document.format_amount(
         net_worth_budgeted,
         force_explicit_currency=force_explicit_currency)
     self.net_worth.delta = self.document.format_amount(
         net_worth_delta, force_explicit_currency=force_explicit_currency)
     self.net_worth.delta_perc = get_delta_perc(net_worth_delta,
                                                net_worth_start)
     self.net_worth.is_total = True
     self.append(self.assets)
     self.append(self.liabilities)
     self.append(self.net_worth)
Example #18
0
def getmatches_by_contents(files, sizeattr='size', partial=False, j=job.nulljob):
    """Returns a list of :class:`Match` within ``files`` if their contents is the same.
    
    :param str sizeattr: attibute name of the :class:`~core.fs.file` that returns the size of the
                         file to use for comparison.
    :param bool partial: if true, will use the "md5partial" attribute instead of "md5" to compute
                         contents hash.
    :param j: A :ref:`job progress instance <jobs>`.
    """
    j = j.start_subjob([2, 8])
    size2files = defaultdict(set)
    for file in j.iter_with_progress(files, tr("Read size of %d/%d files")):
        filesize = getattr(file, sizeattr)
        if filesize:
            size2files[filesize].add(file)
    possible_matches = [files for files in size2files.values() if len(files) > 1]
    del size2files
    result = []
    j.start_job(len(possible_matches), tr("0 matches found"))
    for group in possible_matches:
        for first, second in itertools.combinations(group, 2):
            if first.is_ref and second.is_ref:
                continue # Don't spend time comparing two ref pics together.
            if first.md5partial == second.md5partial:
                if partial or first.md5 == second.md5:
                    result.append(Match(first, second, 100))
        j.add_progress(desc=tr("%d matches found") % len(result))
    return result
Example #19
0
 def _job_completed(self, jobid):
     if jobid == JobType.Scan:
         self._results_changed()
         if not self.results.groups:
             self.view.show_message(tr("No duplicates found."))
         else:
             self.view.show_results_window()
     if jobid in {JobType.Move, JobType.Delete}:
         self._results_changed()
     if jobid == JobType.Load:
         self._recreate_result_table()
         self._results_changed()
         self.view.show_results_window()
     if jobid in {JobType.Copy, JobType.Move, JobType.Delete}:
         if self.results.problems:
             self.problem_dialog.refresh()
             self.view.show_problem_dialog()
         else:
             msg = {
                 JobType.Copy:
                 tr("All marked files were copied successfully."),
                 JobType.Move:
                 tr("All marked files were moved successfully."),
                 JobType.Delete:
                 tr("All marked files were successfully sent to Trash."),
             }[jobid]
             self.view.show_message(msg)
Example #20
0
 def _refresh(self):
     self.clear()
     self.has_multiple_currencies = self.document.accounts.has_multiple_currencies(
     )
     self.income = self.make_type_node(tr('INCOME'), AccountType.Income)
     self.expenses = self.make_type_node(tr('EXPENSES'),
                                         AccountType.Expense)
     self.net_income = self._make_node(tr('NET INCOME'))
     net_income = self.income.cash_flow_amount - self.expenses.cash_flow_amount
     last_net_income = self.income.last_cash_flow_amount - self.expenses.last_cash_flow_amount
     net_budgeted = self.income.budgeted_amount - self.expenses.budgeted_amount
     delta = net_income - last_net_income
     force_explicit_currency = self.has_multiple_currencies
     self.net_income.cash_flow = self.document.format_amount(
         net_income, force_explicit_currency=force_explicit_currency)
     self.net_income.last_cash_flow = self.document.format_amount(
         last_net_income, force_explicit_currency=force_explicit_currency)
     self.net_income.budgeted = self.document.format_amount(
         net_budgeted, force_explicit_currency=force_explicit_currency)
     self.net_income.delta = self.document.format_amount(
         delta, force_explicit_currency=force_explicit_currency)
     self.net_income.delta_perc = get_delta_perc(delta, last_net_income)
     self.net_income.is_total = True
     self.append(self.income)
     self.append(self.expenses)
     self.append(self.net_income)
Example #21
0
def getmatches_by_contents(files, sizeattr="size", partial=False, j=job.nulljob):
    """Returns a list of :class:`Match` within ``files`` if their contents is the same.
    
    :param str sizeattr: attibute name of the :class:`~core.fs.file` that returns the size of the
                         file to use for comparison.
    :param bool partial: if true, will use the "md5partial" attribute instead of "md5" to compute
                         contents hash.
    :param j: A :ref:`job progress instance <jobs>`.
    """
    j = j.start_subjob([2, 8])
    size2files = defaultdict(set)
    for file in j.iter_with_progress(files, tr("Read size of %d/%d files")):
        filesize = getattr(file, sizeattr)
        if filesize:
            size2files[filesize].add(file)
    possible_matches = [files for files in size2files.values() if len(files) > 1]
    del size2files
    result = []
    j.start_job(len(possible_matches), tr("0 matches found"))
    for group in possible_matches:
        for first, second in itertools.combinations(group, 2):
            if first.is_ref and second.is_ref:
                continue  # Don't spend time comparing two ref pics together.
            if first.md5partial == second.md5partial:
                if partial or first.md5 == second.md5:
                    result.append(Match(first, second, 100))
        j.add_progress(desc=tr("%d matches found") % len(result))
    return result
Example #22
0
 def format_criterion_value(self, value):
     return {
         self.ENDS_WITH_NUMBER: tr("Ends with number"),
         self.DOESNT_END_WITH_NUMBER: tr("Doesn't end with number"),
         self.LONGEST: tr("Longest"),
         self.SHORTEST: tr("Shortest"),
     }[value]
Example #23
0
 def get_scan_options():
     return [
         ScanOption(ScanType.Filename, tr("Filename")),
         ScanOption(ScanType.Fields, tr("Filename - Fields")),
         ScanOption(ScanType.FieldsNoOrder, tr("Filename - Fields (No Order)")),
         ScanOption(ScanType.Tag, tr("Tags")),
         ScanOption(ScanType.Contents, tr("Contents")),
     ]
Example #24
0
 def add_directory(self, d):
     try:
         self.directories.add_path(Path(d))
         self.notify('directories_changed')
     except directories.AlreadyThereError:
         self.view.show_message(tr("'{}' already is in the list.").format(d))
     except directories.InvalidPathError:
         self.view.show_message(tr("'{}' does not exist.").format(d))
Example #25
0
 def get_scan_options():
     return [
         ScanOption(ScanType.FILENAME, tr("Filename")),
         ScanOption(ScanType.FIELDS, tr("Filename - Fields")),
         ScanOption(ScanType.FIELDSNOORDER,
                    tr("Filename - Fields (No Order)")),
         ScanOption(ScanType.TAG, tr("Tags")),
         ScanOption(ScanType.CONTENTS, tr("Contents")),
     ]
Example #26
0
 def get_scan_options():
     return [
         ScanOption(ScanType.Filename, tr("Filename")),
         ScanOption(ScanType.Fields, tr("Filename - Fields")),
         ScanOption(ScanType.FieldsNoOrder,
                    tr("Filename - Fields (No Order)")),
         ScanOption(ScanType.Tag, tr("Tags")),
         ScanOption(ScanType.Contents, tr("Contents")),
     ]
Example #27
0
 def new_group(self, type):
     name = self.groups.new_name(tr('New group'), type)
     group = Group(name, type)
     action = Action(tr('Add group'))
     action.added_groups.add(group)
     self._undoer.record(action)
     self.groups.append(group)
     self.notify('account_added')
     return group
Example #28
0
def getmatches(
    objects, min_match_percentage=0, match_similar_words=False, weight_words=False, no_field_order=False, j=job.nulljob
):
    """Returns a list of :class:`Match` within ``objects`` after fuzzily matching their words.
    
    :param objects: List of :class:`~core.fs.File` to match.
    :param int min_match_percentage: minimum % of words that have to match.
    :param bool match_similar_words: make similar words (see :func:`merge_similar_words`) match.
    :param bool weight_words: longer words are worth more in match % computations.
    :param bool no_field_order: match :ref:`fields` regardless of their order.
    :param j: A :ref:`job progress instance <jobs>`.
    """
    COMMON_WORD_THRESHOLD = 50
    LIMIT = 5000000
    j = j.start_subjob(2)
    sj = j.start_subjob(2)
    for o in objects:
        if not hasattr(o, "words"):
            o.words = getwords(o.name)
    word_dict = build_word_dict(objects, sj)
    reduce_common_words(word_dict, COMMON_WORD_THRESHOLD)
    if match_similar_words:
        merge_similar_words(word_dict)
    match_flags = []
    if weight_words:
        match_flags.append(WEIGHT_WORDS)
    if match_similar_words:
        match_flags.append(MATCH_SIMILAR_WORDS)
    if no_field_order:
        match_flags.append(NO_FIELD_ORDER)
    j.start_job(len(word_dict), tr("0 matches found"))
    compared = defaultdict(set)
    result = []
    try:
        # This whole 'popping' thing is there to avoid taking too much memory at the same time.
        while word_dict:
            items = word_dict.popitem()[1]
            while items:
                ref = items.pop()
                compared_already = compared[ref]
                to_compare = items - compared_already
                compared_already |= to_compare
                for other in to_compare:
                    m = get_match(ref, other, match_flags)
                    if m.percentage >= min_match_percentage:
                        result.append(m)
                        if len(result) >= LIMIT:
                            return result
            j.add_progress(desc=tr("%d matches found") % len(result))
    except MemoryError:
        # This is the place where the memory usage is at its peak during the scan.
        # Just continue the process with an incomplete list of matches.
        del compared  # This should give us enough room to call logging.
        logging.warning("Memory Overflow. Matches: %d. Word dict: %d" % (len(result), len(word_dict)))
        return result
    return result
Example #29
0
 def new_account(self, type, group):
     name = self.accounts.new_name(tr('New account'))
     account = Account(name, self.default_currency, type)
     account.group = group
     action = Action(tr('Add account'))
     action.added_accounts.add(account)
     self._undoer.record(action)
     self.accounts.add(account)
     self.notify('account_added')
     return account
Example #30
0
    def export_to_csv(self):
        """Export current results to CSV.

        The columns and their order in the resulting CSV file is determined in the same way as in
        :meth:`export_to_xhtml`.
        """
        dest_file = self.view.select_dest_file(tr("Select a destination for your exported CSV"), 'csv')
        if dest_file:
            colnames, rows = self._get_export_data()
            try:
                export.export_to_csv(dest_file, colnames, rows)
            except OSError as e:
                self.view.show_message(tr("Couldn't write to file: {}").format(str(e)))
Example #31
0
 def _get_action_from_changed_transactions(self, transactions, global_scope=False):
     if len(transactions) == 1 and not isinstance(transactions[0], Spawn) \
             and transactions[0] not in self.transactions:
         action = Action(tr('Add transaction'))
         action.added_transactions.add(transactions[0])
     else:
         action = Action(tr('Change transaction'))
         action.change_transactions(transactions)
     if global_scope:
         spawns, txns = extract(lambda x: isinstance(x, Spawn), transactions)
         for schedule in {spawn.recurrence for spawn in spawns}:
             action.change_schedule(schedule)
     return action
Example #32
0
 def add_directory(self, d):
     """Adds folder ``d`` to :attr:`directories`.
     
     Shows an error message dialog if something bad happens.
     
     :param str d: path of folder to add
     """
     try:
         self.directories.add_path(Path(d))
         self.notify('directories_changed')
     except directories.AlreadyThereError:
         self.view.show_message(tr("'{}' already is in the list.").format(d))
     except directories.InvalidPathError:
         self.view.show_message(tr("'{}' does not exist.").format(d))
Example #33
0
    def add_directory(self, d):
        """Adds folder ``d`` to :attr:`directories`.

        Shows an error message dialog if something bad happens.

        :param str d: path of folder to add
        """
        try:
            self.directories.add_path(Path(d))
            self.notify("directories_changed")
        except directories.AlreadyThereError:
            self.view.show_message(tr("'{}' already is in the list.").format(d))
        except directories.InvalidPathError:
            self.view.show_message(tr("'{}' does not exist.").format(d))
Example #34
0
    def export_to_csv(self):
        """Export current results to CSV.

        The columns and their order in the resulting CSV file is determined in the same way as in
        :meth:`export_to_xhtml`.
        """
        dest_file = self.view.select_dest_file(
            tr("Select a destination for your exported CSV"), 'csv')
        if dest_file:
            colnames, rows = self._get_export_data()
            try:
                export.export_to_csv(dest_file, colnames, rows)
            except OSError as e:
                self.view.show_message(
                    tr("Couldn't write to file: {}").format(str(e)))
Example #35
0
 def _job_error(self, jobid, err):
     if jobid == JobType.Load:
         msg = tr("Could not load file: {}").format(err)
         self.view.show_message(msg)
         return False
     else:
         raise err
Example #36
0
 def __init__(self, document):
     DocumentGUIObject.__init__(self, document)
     self._selected_pane_index = 0
     self._selected_target_index = 0
     def setfunc(index):
         self.view.set_swap_button_enabled(self.can_perform_swap())
     self.swap_type_list = LinkedSelectableList(items=[
         "<placeholder> Day <--> Month",
         "<placeholder> Month <--> Year",
         "<placeholder> Day <--> Year",
         tr("Description <--> Payee"),
         tr("Invert Amounts"),
     ], setfunc=setfunc)
     self.swap_type_list.selected_index = SwapType.DayMonth
     self.panes = []
     self.import_table = ImportTable(self)
Example #37
0
 def __init__(self, table, account, date, total_debit, total_credit):
     super(TotalRow, self).__init__(table, account)
     self._date = date
     self._description = tr('TOTAL')
     # don't touch _increase and _decrease, they trigger editing.
     self._debit_fmt = table.document.format_amount(total_debit, blank_zero=True)
     self._credit_fmt = table.document.format_amount(total_credit, blank_zero=True)
     delta = total_debit - total_credit
     if delta:
         if account.is_credit_account():
             delta *= -1
         positive = delta > 0
         # format_amount doesn't explicitly put positive signs, so we have to put it ourselves.
         # However, if the delta is of foreign currency, we want the sign to be in front of the
         # amount, not in front of the currency code.
         delta_fmt = table.document.format_amount(abs(delta))
         sign = '+' if positive else '-'
         if delta_fmt[0].isdigit():
             delta_fmt = sign + delta_fmt
         else:
             # we have a currency code in front of our amount, a little trick is to replace the
             # only space character we have by space + sign
             delta_fmt = delta_fmt.replace(' ', ' ' + sign)
         self._balance_fmt = delta_fmt
     else:
         self._balance_fmt = ''
     self.is_bold = True
Example #38
0
 def _load(self):
     lines = self.lines[:]
     colcount = len(lines[0]) if lines else 0
     columns = self.columns[:colcount]
     self._merge_columns(columns, lines)
     ci = {}
     for index, field in enumerate(columns):
         if field is not None:
             ci[field] = index
     hasdate = CsvField.Date in ci
     hasamount = (CsvField.Amount in ci) or (CsvField.Increase in ci and CsvField.Decrease in ci)
     if not (hasdate and hasamount):
         raise FileLoadError(tr("The Date and Amount columns must be set."))
     self.account_info.name = 'CSV Import'
     self.parsing_date_format, lines_to_load = self._parse_date_format(lines, ci)
     self._check_amount_values(lines_to_load, ci)
     for line in lines_to_load:
         self.start_transaction()
         for attr, index in ci.items():
             value = line[index]
             if attr == CsvField.Date:
                 value = self.parse_date_str(value, self.parsing_date_format)
             elif attr == CsvField.Increase:
                 attr = CsvField.Amount
             elif attr == CsvField.Decrease:
                 attr = CsvField.Amount
                 if value.strip() and not value.startswith('-'):
                     value = '-' + value
             if isinstance(value, str):
                 value = value.strip()
             if value:
                 setattr(self.transaction_info, attr, value)
Example #39
0
 def _load(self):
     lines = self.lines[:]
     colcount = len(lines[0]) if lines else 0
     columns = self.columns[:colcount]
     self._merge_columns(columns, lines)
     ci = {}
     for index, field in enumerate(columns):
         if field is not None:
             ci[field] = index
     hasdate = CsvField.Date in ci
     hasamount = (CsvField.Amount in ci) or (CsvField.Increase in ci and CsvField.Decrease in ci)
     if not (hasdate and hasamount):
         raise FileLoadError(tr("The Date and Amount columns must be set."))
     self.account_info.name = 'CSV Import'
     self.parsing_date_format, lines_to_load = self._parse_date_format(lines, ci)
     self._check_amount_values(lines_to_load, ci)
     for line in lines_to_load:
         self.start_transaction()
         for attr, index in ci.items():
             value = line[index]
             if attr == CsvField.Date:
                 value = self.parse_date_str(value, self.parsing_date_format)
             elif attr == CsvField.Increase:
                 attr = CsvField.Amount
             elif attr == CsvField.Decrease:
                 attr = CsvField.Amount
                 if value.strip() and not value.startswith('-'):
                     value = '-' + value
             if isinstance(value, str):
                 value = value.strip()
             if value:
                 setattr(self.transaction_info, attr, value)
Example #40
0
 def toggle_entries_reconciled(self, entries):
     """Toggle the reconcile flag of `entries`.
     """
     if not entries:
         return
     all_reconciled = not entries or all(entry.reconciled for entry in entries)
     newvalue = not all_reconciled
     action = Action(tr('Change reconciliation'))
     action.change_splits([e.split for e in entries])
     min_date = min(entry.date for entry in entries)
     splits = [entry.split for entry in entries]
     spawns, splits = extract(lambda s: isinstance(s.transaction, Spawn), splits)
     for spawn in spawns:
         action.change_schedule(spawn.transaction.recurrence)
     self._undoer.record(action)
     if newvalue:
         for split in splits:
             split.reconciliation_date = split.transaction.date
         for spawn in spawns:
             #XXX update transaction selection
             materialized_split = self._reconcile_spawn_split(spawn, spawn.transaction.date)
             action.added_transactions.add(materialized_split.transaction)
     else:
         for split in splits:
             split.reconciliation_date = None
     self._cook(from_date=min_date)
     self.notify('transaction_changed')
Example #41
0
    def parse_file_for_import(self, filename):
        """Parses ``filename`` in preparation for importing.

        Opens and parses ``filename`` and try to determine its format by successively trying to read
        is as a moneyGuru file, an OFX, a QIF and finally a CSV. Once parsed, take the appropriate
        action for the file which is either to show the CSV options window or to call
        :meth:`load_parsed_file_for_import`.
        """
        default_date_format = DateFormat(self.app.date_format).sys_format
        for loaderclass in (native.Loader, ofx.Loader, qif.Loader, csv.Loader):
            try:
                loader = loaderclass(self.document.default_currency,
                                     default_date_format=default_date_format)
                loader.parse(filename)
                break
            except FileFormatError:
                pass
        else:
            # No file fitted
            raise FileFormatError(tr('%s is of an unknown format.') % filename)
        self.loader = loader
        if isinstance(self.loader, csv.Loader):
            self.csv_options.show()
        else:
            self.load_parsed_file_for_import()
Example #42
0
 def load_from_xml(self, filename):
     loader = native.Loader(self.default_currency)
     try:
         loader.parse(filename)
     except FileFormatError:
         raise FileFormatError(tr('"%s" is not a moneyGuru file') % filename)
     loader.load()
     self._clear()
     self._document_id = loader.document_id
     for propname in self._properties:
         if propname in loader.properties:
             self._properties[propname] = loader.properties[propname]
     for group in loader.groups:
         self.groups.append(group)
     for account in loader.accounts:
         self.accounts.add(account)
     for transaction in loader.transactions:
         self.transactions.add(transaction, position=transaction.position)
     for recurrence in loader.schedules:
         self.schedules.append(recurrence)
     for budget in loader.budgets:
         self.budgets.append(budget)
     self.accounts.default_currency = self.default_currency
     self._cook()
     self._restore_preferences_after_load()
     self.notify('document_changed')
     self._undoer.set_save_point()
     self._refresh_date_range()
Example #43
0
 def _refresh_totals(self):
     selected, total, total_debit, total_credit = self.gltable.get_totals()
     total_debit_fmt = self.document.format_amount(total_debit)
     total_credit_fmt = self.document.format_amount(total_credit)
     msg = tr("{0} out of {1} selected. Debit: {2} Credit: {3}")
     self.status_line = msg.format(selected, total, total_debit_fmt,
                                   total_credit_fmt)
Example #44
0
 def invoke_custom_command(self):
     """Calls command in ``CustomCommand`` pref with ``%d`` and ``%r`` placeholders replaced.
     
     Using the current selection, ``%d`` is replaced with the currently selected dupe and ``%r``
     is replaced with that dupe's ref file. If there's no selection, the command is not invoked.
     If the dupe is a ref, ``%d`` and ``%r`` will be the same.
     """
     cmd = self.view.get_default('CustomCommand')
     if not cmd:
         msg = tr("You have no custom command set up. Set it up in your preferences.")
         self.view.show_message(msg)
         return
     if not self.selected_dupes:
         return
     dupe = self.selected_dupes[0]
     group = self.results.get_group_of_duplicate(dupe)
     ref = group.ref
     cmd = cmd.replace('%d', str(dupe.path))
     cmd = cmd.replace('%r', str(ref.path))
     match = re.match(r'"([^"]+)"(.*)', cmd)
     if match is not None:
         # This code here is because subprocess. Popen doesn't seem to accept, under Windows,
         # executable paths with spaces in it, *even* when they're enclosed in "". So this is
         # a workaround to make the damn thing work.
         exepath, args = match.groups()
         path, exename = op.split(exepath)
         subprocess.Popen(exename + args, shell=True, cwd=path)
     else:
         subprocess.Popen(cmd, shell=True)
Example #45
0
 def _start_job(self, jobid, func, args=()):
     title = JOBID2TITLE[jobid]
     try:
         self.progress_window.run(jobid, title, func, args=args) 
     except job.JobInProgressError:
         msg = tr("A previous action is still hanging in there. You can't start a new one yet. Wait a few seconds, then try again.")
         self.view.show_message(msg)
Example #46
0
 def delete_accounts(self, accounts, reassign_to=None):
     action = Action(tr('Remove account'))
     accounts = set(accounts)
     action.delete_accounts(accounts)
     affected_schedules = [s for s in self.schedules if accounts & s.affected_accounts()]
     for schedule in affected_schedules:
         action.change_schedule(schedule)
     for account in accounts:
         affected_budgets = [b for b in self.budgets if b.account is account or b.target is account]
         if account.is_income_statement_account() and reassign_to is None:
             action.deleted_budgets |= set(affected_budgets)
         else:
             for budget in affected_budgets:
                 action.change_budget(budget)
     self._undoer.record(action)
     for account in accounts:
         self.transactions.reassign_account(account, reassign_to)
         for schedule in affected_schedules:
             schedule.reassign_account(account, reassign_to)
         for budget in affected_budgets:
             if budget.account is account:
                 if reassign_to is None:
                     self.budgets.remove(budget)
                 else:
                     budget.account = reassign_to
             elif budget.target is account:
                 budget.target = reassign_to
         self.accounts.remove(account)
     self._cook()
     self.notify('account_deleted')
Example #47
0
    def invoke_custom_command(self):
        """Calls command in ``CustomCommand`` pref with ``%d`` and ``%r`` placeholders replaced.

        Using the current selection, ``%d`` is replaced with the currently selected dupe and ``%r``
        is replaced with that dupe's ref file. If there's no selection, the command is not invoked.
        If the dupe is a ref, ``%d`` and ``%r`` will be the same.
        """
        cmd = self.view.get_default('CustomCommand')
        if not cmd:
            msg = tr(
                "You have no custom command set up. Set it up in your preferences."
            )
            self.view.show_message(msg)
            return
        if not self.selected_dupes:
            return
        dupe = self.selected_dupes[0]
        group = self.results.get_group_of_duplicate(dupe)
        ref = group.ref
        cmd = cmd.replace('%d', str(dupe.path))
        cmd = cmd.replace('%r', str(ref.path))
        match = re.match(r'"([^"]+)"(.*)', cmd)
        if match is not None:
            # This code here is because subprocess. Popen doesn't seem to accept, under Windows,
            # executable paths with spaces in it, *even* when they're enclosed in "". So this is
            # a workaround to make the damn thing work.
            exepath, args = match.groups()
            path, exename = op.split(exepath)
            subprocess.Popen(exename + args, shell=True, cwd=path)
        else:
            subprocess.Popen(cmd, shell=True)
Example #48
0
 def split_values(self, row_index, split_row_index):
     splits = self._get_splits_at_row(row_index)
     split = splits[split_row_index]
     account_name = split.account.name if split.account is not None else tr(
         'Unassigned')
     return SplitValues(account_name, split.memo,
                        self.document.format_amount(split.amount))
Example #49
0
 def _job_error(self, jobid, err):
     if jobid == JobType.Load:
         msg = tr("Could not load file: {}").format(err)
         self.view.show_message(msg)
         return False
     else:
         raise err
Example #50
0
    def start_scanning(self):
        """Starts an async job to scan for duplicates.

        Scans folders selected in :attr:`directories` and put the results in :attr:`results`
        """
        scanner = self.SCANNER_CLASS()
        if not self.directories.has_any_file():
            self.view.show_message(tr("The selected directories contain no scannable file."))
            return
        # Send relevant options down to the scanner instance
        for k, v in self.options.items():
            if hasattr(scanner, k):
                setattr(scanner, k, v)
        self.results.groups = []
        self._results_changed()

        def do(j):
            j.set_progress(0, tr("Collecting files to scan"))
            if scanner.scan_type == ScanType.Folders:
                files = list(self.directories.get_folders(folderclass=self.folderclass, j=j))
            else:
                files = list(self.directories.get_files(fileclasses=self.fileclasses, j=j))
            if self.options['ignore_hardlink_matches']:
                files = self._remove_hardlink_dupes(files)
            logging.info('Scanning %d files' % len(files))
            self.results.groups = scanner.get_dupe_groups(files, self.ignore_list, j)
            self.discarded_file_count = scanner.discarded_file_count

        self._start_job(JobType.Scan, do)
 def clear(self):
     if not self.ignore_list:
         return
     msg = tr("Do you really want to remove all %d items from the ignore list?") % len(self.ignore_list)
     if self.app.view.ask_yes_no(msg):
         self.ignore_list.Clear()
         self.refresh()
Example #52
0
 def show(self):
     self._default_layout = Layout(tr('Default'))
     self.layout = self._default_layout
     self._refresh_columns()
     self._refresh_lines()
     self._refresh_targets()
     self.view.refresh_layout_menu()
     self.view.show()
Example #53
0
 def _check_demo(self):
     if self.should_apply_demo_limitation and self.results.mark_count > 10:
         msg = tr(
             "You cannot delete, move or copy more than 10 duplicates at once in demo mode."
         )
         self.view.show_message(msg)
         return False
     return True
Example #54
0
class SwapDescriptionPayeeAction(BaseSwapFields):

    NAME = "Swap Description Payee Import Action"
    ACTION_NAME = tr("Description <--> Payee")
    PRIORITY = 4

    def _switch_function(self, txn):
        txn.description, txn.payee = txn.payee, txn.description
Example #55
0
 def _refresh_totals(self):
     selected = len(self.mainwindow.selected_transactions)
     total = len(self.visible_transactions)
     currency = self.document.default_currency
     total_amount = sum(convert_amount(t.amount, currency, t.date) for t in self.mainwindow.selected_transactions)
     total_amount_fmt = self.document.format_amount(total_amount)
     msg = tr("{0} out of {1} selected. Amount: {2}")
     self.status_line = msg.format(selected, total, total_amount_fmt)
Example #56
0
 def get_dupe_groups(self, files, ignore_list=None, j=job.nulljob):
     for f in (f for f in files if not hasattr(f, "is_ref")):
         f.is_ref = False
     files = remove_dupe_paths(files)
     logging.info("Getting matches. Scan type: %d", self.scan_type)
     matches = self._getmatches(files, j)
     logging.info("Found %d matches" % len(matches))
     j.set_progress(100, tr("Almost done! Fiddling with results..."))
     # In removing what we call here "false matches", we first want to remove, if we scan by
     # folders, we want to remove folder matches for which the parent is also in a match (they're
     # "duplicated duplicates if you will). Then, we also don't want mixed file kinds if the
     # option isn't enabled, we want matches for which both files exist and, lastly, we don't
     # want matches with both files as ref.
     if self.scan_type == ScanType.FOLDERS and matches:
         allpath = {m.first.path for m in matches}
         allpath |= {m.second.path for m in matches}
         sortedpaths = sorted(allpath)
         toremove = set()
         last_parent_path = sortedpaths[0]
         for p in sortedpaths[1:]:
             if p in last_parent_path:
                 toremove.add(p)
             else:
                 last_parent_path = p
         matches = [m for m in matches if m.first.path not in toremove or m.second.path not in toremove]
     if not self.mix_file_kind:
         matches = [m for m in matches if get_file_ext(m.first.name) == get_file_ext(m.second.name)]
     matches = [m for m in matches if m.first.path.exists() and m.second.path.exists()]
     matches = [m for m in matches if not (m.first.is_ref and m.second.is_ref)]
     if ignore_list:
         matches = [m for m in matches if not ignore_list.are_ignored(str(m.first.path), str(m.second.path))]
     logging.info("Grouping matches")
     groups = engine.get_groups(matches)
     if self.scan_type in {
         ScanType.FILENAME,
         ScanType.FIELDS,
         ScanType.FIELDSNOORDER,
         ScanType.TAG,
     }:
         matched_files = dedupe([m.first for m in matches] + [m.second for m in matches])
         self.discarded_file_count = len(matched_files) - sum(len(g) for g in groups)
     else:
         # Ticket #195
         # To speed up the scan, we don't bother comparing contents of files that are both ref
         # files. However, this messes up "discarded" counting because there's a missing match
         # in cases where we end up with a dupe group anyway (with a non-ref file). Because it's
         # impossible to have discarded matches in exact dupe scans, we simply set it at 0, thus
         # bypassing our tricky problem.
         # Also, although ScanType.FuzzyBlock is not always doing exact comparisons, we also
         # bypass ref comparison, thus messing up with our "discarded" count. So we're
         # effectively disabling the "discarded" feature in PE, but it's better than falsely
         # reporting discarded matches.
         self.discarded_file_count = 0
     groups = [g for g in groups if any(not f.is_ref for f in g)]
     logging.info("Created %d groups" % len(groups))
     for g in groups:
         g.prioritize(self._key_func, self._tie_breaker)
     return groups
Example #57
0
 def reprioritize_groups(self, sort_key):
     count = 0
     for group in self.results.groups:
         if group.prioritize(key_func=sort_key):
             count += 1
     self._results_changed()
     msg = tr("{} duplicate groups were changed by the re-prioritization."
              ).format(count)
     self.view.show_message(msg)
Example #58
0
 def _start_job(self, jobid, func, args=()):
     title = JOBID2TITLE[jobid]
     try:
         self.progress_window.run(jobid, title, func, args=args)
     except job.JobInProgressError:
         msg = tr(
             "A previous action is still hanging in there. You can't start a new one yet. Wait "
             "a few seconds, then try again.")
         self.view.show_message(msg)
Example #59
0
 def clear(self):
     if not self.ignore_list:
         return
     msg = tr(
         "Do you really want to remove all %d items from the ignore list?"
     ) % len(self.ignore_list)
     if self.app.view.ask_yes_no(msg):
         self.ignore_list.Clear()
         self.refresh()