def _job_completed(self, jobid): if jobid == JobType.SCAN: self._results_changed() fs.filesdb.commit() if not self.results.groups: self.view.show_message(tr("No duplicates found.")) else: self.view.show_results_window() if jobid in {JobType.MOVE, JobType.DELETE}: self._results_changed() if jobid == JobType.LOAD: self._recreate_result_table() self._results_changed() self.view.show_results_window() if jobid in {JobType.COPY, JobType.MOVE, JobType.DELETE}: if self.results.problems: self.problem_dialog.refresh() self.view.show_problem_dialog() else: if jobid == JobType.COPY: msg = tr("All marked files were copied successfully.") elif jobid == JobType.MOVE: msg = tr("All marked files were moved successfully.") elif jobid == JobType.DELETE and self.deletion_options.direct: msg = tr("All marked files were deleted successfully.") else: msg = tr( "All marked files were successfully sent to Trash.") self.view.show_message(msg)
def copy_or_move_marked(self, copy): """Start an async move (or copy) job on marked duplicates. :param bool copy: If True, duplicates will be copied instead of moved """ def do(j): def op(dupe): j.add_progress() self.copy_or_move(dupe, copy, destination, desttype) j.start_job(self.results.mark_count) self.results.perform_on_marked(op, not copy) if not self._check_demo(): return if not self.results.mark_count: self.view.show_message(MSG_NO_MARKED_DUPES) return opname = tr("copy") if copy else tr("move") prompt = tr("Select a directory to {} marked files to").format(opname) destination = self.view.select_dest_folder(prompt) if destination: desttype = self.options['copymove_dest_type'] jobid = JobType.Copy if copy else JobType.Move self._start_job(jobid, do)
def load(self): schedule = self.schedule txn = schedule.ref self._start_date = txn.date self._start_date_fmt = self.table.document.app.format_date(self._start_date) self._stop_date = schedule.stop_date self._stop_date_fmt = self.table.document.app.format_date(self._stop_date) if self._stop_date is not None else '' self._repeat_type = schedule.repeat_type_desc self._interval = str(schedule.repeat_every) self._description = txn.description self._payee = txn.payee self._checkno = txn.checkno splits = txn.splits froms, tos = txn.splitted_splits() self._from_count = len(froms) self._to_count = len(tos) UNASSIGNED = tr('Unassigned') if len(froms) > 1 else '' self._from = ', '.join(s.account.name if s.account is not None else UNASSIGNED for s in froms) UNASSIGNED = tr('Unassigned') if len(tos) > 1 else '' self._to = ', '.join(s.account.name if s.account is not None else UNASSIGNED for s in tos) try: self._amount = sum(s.amount for s in tos) except ValueError: # currency coercing problem currency = self.document.default_currency self._amount = sum(convert_amount(s.amount, currency, s.transaction.date) for s in tos) self._amount_fmt = self.document.format_amount(self._amount)
def load(self): transaction = self.transaction self._load_from_fields(transaction, self.FIELDS) self._date_fmt = None self._position = transaction.position splits = transaction.splits froms, tos = self.transaction.splitted_splits() self._from_count = len(froms) self._to_count = len(tos) UNASSIGNED = tr('Unassigned') if len(froms) > 1 else '' get_display = lambda s: s.account.combined_display if s.account is not None else UNASSIGNED self._from = ', '.join(map(get_display, froms)) UNASSIGNED = tr('Unassigned') if len(tos) > 1 else '' get_display = lambda s: s.account.combined_display if s.account is not None else UNASSIGNED self._to = ', '.join(map(get_display, tos)) self._amount = transaction.amount self._amount_fmt = None self._mtime = datetime.datetime.fromtimestamp(transaction.mtime) if transaction.mtime > 0: self._mtime_fmt = self._mtime.strftime('%Y/%m/%d %H:%M') else: self._mtime_fmt = '' self._recurrent = isinstance(transaction, Spawn) self._reconciled = any(split.reconciled for split in splits) self._is_budget = getattr(transaction, 'is_budget', False) self._can_set_amount = transaction.can_set_amount
def _getmatches(self, files, j): if self.size_threshold or self.scan_type in {ScanType.Contents, ScanType.Folders}: j = j.start_subjob([2, 8]) for f in j.iter_with_progress(files, tr("Read size of %d/%d files")): f.size # pre-read, makes a smoother progress if read here (especially for bundles) if self.size_threshold: files = [f for f in files if f.size >= self.size_threshold] if self.scan_type in {ScanType.Contents, ScanType.Folders}: return engine.getmatches_by_contents(files, j=j) else: j = j.start_subjob([2, 8]) kw = {} kw['match_similar_words'] = self.match_similar_words kw['weight_words'] = self.word_weighting kw['min_match_percentage'] = self.min_match_percentage if self.scan_type == ScanType.FieldsNoOrder: self.scan_type = ScanType.Fields kw['no_field_order'] = True func = { ScanType.Filename: lambda f: engine.getwords(rem_file_ext(f.name)), ScanType.Fields: lambda f: engine.getfields(rem_file_ext(f.name)), ScanType.Tag: lambda f: [ engine.getwords(str(getattr(f, attrname))) for attrname in SCANNABLE_TAGS if attrname in self.scanned_tags ], }[self.scan_type] for f in j.iter_with_progress(files, tr("Read metadata of %d/%d files")): logging.debug("Reading metadata of %s", f.path) f.words = func(f) return engine.getmatches(files, j=j, **kw)
def getmatches_by_contents(files, j=job.nulljob): """Returns a list of :class:`Match` within ``files`` if their contents is the same. :param j: A :ref:`job progress instance <jobs>`. """ size2files = defaultdict(set) for f in files: if f.size: size2files[f.size].add(f) del files possible_matches = [ files for files in size2files.values() if len(files) > 1 ] del size2files result = [] j.start_job(len(possible_matches), tr("0 matches found")) for group in possible_matches: for first, second in itertools.combinations(group, 2): if first.is_ref and second.is_ref: continue # Don't spend time comparing two ref pics together. if first.md5partial == second.md5partial: if first.md5 == second.md5: result.append(Match(first, second, 100)) j.add_progress(desc=tr("%d matches found") % len(result)) return result
def format_criterion_value(self, value): return { self.ENDS_WITH_NUMBER: tr("Ends with number"), self.DOESNT_END_WITH_NUMBER: tr("Doesn't end with number"), self.LONGEST: tr("Longest"), self.SHORTEST: tr("Shortest"), }[value]
def getmatches(objects, min_match_percentage=0, match_similar_words=False, weight_words=False, no_field_order=False, j=job.nulljob): """Returns a list of :class:`Match` within ``objects`` after fuzzily matching their words. :param objects: List of :class:`~core.fs.File` to match. :param int min_match_percentage: minimum % of words that have to match. :param bool match_similar_words: make similar words (see :func:`merge_similar_words`) match. :param bool weight_words: longer words are worth more in match % computations. :param bool no_field_order: match :ref:`fields` regardless of their order. :param j: A :ref:`job progress instance <jobs>`. """ COMMON_WORD_THRESHOLD = 50 LIMIT = 5000000 j = j.start_subjob(2) sj = j.start_subjob(2) for o in objects: if not hasattr(o, 'words'): o.words = getwords(o.name) word_dict = build_word_dict(objects, sj) reduce_common_words(word_dict, COMMON_WORD_THRESHOLD) if match_similar_words: merge_similar_words(word_dict) match_flags = [] if weight_words: match_flags.append(WEIGHT_WORDS) if match_similar_words: match_flags.append(MATCH_SIMILAR_WORDS) if no_field_order: match_flags.append(NO_FIELD_ORDER) j.start_job(len(word_dict), tr("0 matches found")) compared = defaultdict(set) result = [] try: # This whole 'popping' thing is there to avoid taking too much memory at the same time. while word_dict: items = word_dict.popitem()[1] while items: ref = items.pop() compared_already = compared[ref] to_compare = items - compared_already compared_already |= to_compare for other in to_compare: m = get_match(ref, other, match_flags) if m.percentage >= min_match_percentage: result.append(m) if len(result) >= LIMIT: return result j.add_progress(desc=tr("%d matches found") % len(result)) except MemoryError: # This is the place where the memory usage is at its peak during the scan. # Just continue the process with an incomplete list of matches. del compared # This should give us enough room to call logging. logging.warning('Memory Overflow. Matches: %d. Word dict: %d' % (len(result), len(word_dict))) return result return result
def _refresh(self): self.clear() self.has_multiple_currencies = self.document.accounts.has_multiple_currencies() self.assets = self.make_type_node(tr('ASSETS'), AccountType.Asset) self.liabilities = self.make_type_node(tr('LIABILITIES'), AccountType.Liability) self.net_worth = self._make_node(tr('NET WORTH')) net_worth_start = self.assets.start_amount - self.liabilities.start_amount net_worth_end = self.assets.end_amount - self.liabilities.end_amount budget_date_range = DateRange(date.today(), self.document.date_range.end) # The net worth's budget is not a simple subtraction, it must count the target-less budgets net_worth_budgeted = self.document.budgeted_amount_for_target(None, budget_date_range) net_worth_delta = net_worth_end - net_worth_start force_explicit_currency = self.has_multiple_currencies self.net_worth.start = self.document.format_amount( net_worth_start, force_explicit_currency=force_explicit_currency ) self.net_worth.end = self.document.format_amount( net_worth_end, force_explicit_currency=force_explicit_currency ) self.net_worth.budgeted = self.document.format_amount( net_worth_budgeted, force_explicit_currency=force_explicit_currency ) self.net_worth.delta = self.document.format_amount( net_worth_delta, force_explicit_currency=force_explicit_currency ) self.net_worth.delta_perc = get_delta_perc(net_worth_delta, net_worth_start) self.net_worth.is_total = True self.append(self.assets) self.append(self.liabilities) self.append(self.net_worth)
def __get_stat_line(self): if self.__filtered_dupes is None: mark_count = self.mark_count marked_size = self.__marked_size total_count = self.__total_count total_size = self.__total_size else: mark_count = len([ dupe for dupe in self.__filtered_dupes if self.is_marked(dupe) ]) marked_size = sum(dupe.size for dupe in self.__filtered_dupes if self.is_marked(dupe)) total_count = len([ dupe for dupe in self.__filtered_dupes if self.is_markable(dupe) ]) total_size = sum(dupe.size for dupe in self.__filtered_dupes if self.is_markable(dupe)) if self.mark_inverted: marked_size = self.__total_size - marked_size result = tr("%d / %d (%s / %s) duplicates marked.") % ( mark_count, total_count, format_size(marked_size, 2), format_size(total_size, 2), ) if self.__filters: result += tr(" filter: %s") % ' --> '.join(self.__filters) return result
def get_dupe_groups(self, files, j=job.nulljob): j = j.start_subjob([8, 2]) for f in (f for f in files if not hasattr(f, 'is_ref')): f.is_ref = False files = remove_dupe_paths(files) logging.info("Getting matches. Scan type: %d", self.scan_type) matches = self._getmatches(files, j) logging.info('Found %d matches' % len(matches)) j.set_progress(100, tr("Removing false matches")) # In removing what we call here "false matches", we first want to remove, if we scan by # folders, we want to remove folder matches for which the parent is also in a match (they're # "duplicated duplicates if you will). Then, we also don't want mixed file kinds if the # option isn't enabled, we want matches for which both files exist and, lastly, we don't # want matches with both files as ref. if self.scan_type == ScanType.Folders and matches: allpath = {m.first.path for m in matches} allpath |= {m.second.path for m in matches} sortedpaths = sorted(allpath) toremove = set() last_parent_path = sortedpaths[0] for p in sortedpaths[1:]: if p in last_parent_path: toremove.add(p) else: last_parent_path = p matches = [m for m in matches if m.first.path not in toremove or m.second.path not in toremove] if not self.mix_file_kind: matches = [m for m in matches if get_file_ext(m.first.name) == get_file_ext(m.second.name)] matches = [m for m in matches if m.first.path.exists() and m.second.path.exists()] matches = [m for m in matches if not (m.first.is_ref and m.second.is_ref)] if self.ignore_list: j = j.start_subjob(2) iter_matches = j.iter_with_progress(matches, tr("Processed %d/%d matches against the ignore list")) matches = [ m for m in iter_matches if not self.ignore_list.AreIgnored(str(m.first.path), str(m.second.path)) ] logging.info('Grouping matches') groups = engine.get_groups(matches, j) matched_files = dedupe([m.first for m in matches] + [m.second for m in matches]) if self.scan_type in {ScanType.Filename, ScanType.Fields, ScanType.FieldsNoOrder, ScanType.Tag}: self.discarded_file_count = len(matched_files) - sum(len(g) for g in groups) else: # Ticket #195 # To speed up the scan, we don't bother comparing contents of files that are both ref # files. However, this messes up "discarded" counting because there's a missing match # in cases where we end up with a dupe group anyway (with a non-ref file). Because it's # impossible to have discarded matches in exact dupe scans, we simply set it at 0, thus # bypassing our tricky problem. # Also, although ScanType.FuzzyBlock is not always doing exact comparisons, we also # bypass ref comparison, thus messing up with our "discarded" count. So we're # effectively disabling the "discarded" feature in PE, but it's better than falsely # reporting discarded matches. self.discarded_file_count = 0 groups = [g for g in groups if any(not f.is_ref for f in g)] logging.info('Created %d groups' % len(groups)) j.set_progress(100, tr("Doing group prioritization")) for g in groups: g.prioritize(self._key_func, self._tie_breaker) return groups
def _refresh(self): self.clear() self.has_multiple_currencies = self.document.accounts.has_multiple_currencies() self.income = self.make_type_node(tr('INCOME'), AccountType.Income) self.expenses = self.make_type_node(tr('EXPENSES'), AccountType.Expense) self.net_income = self._make_node(tr('NET INCOME')) net_income = self.income.cash_flow_amount - self.expenses.cash_flow_amount last_net_income = self.income.last_cash_flow_amount - self.expenses.last_cash_flow_amount net_budgeted = self.income.budgeted_amount - self.expenses.budgeted_amount delta = net_income - last_net_income force_explicit_currency = self.has_multiple_currencies self.net_income.cash_flow = self.document.format_amount( net_income, force_explicit_currency=force_explicit_currency ) self.net_income.last_cash_flow = self.document.format_amount( last_net_income, force_explicit_currency=force_explicit_currency ) self.net_income.budgeted = self.document.format_amount( net_budgeted, force_explicit_currency=force_explicit_currency ) self.net_income.delta = self.document.format_amount( delta, force_explicit_currency=force_explicit_currency ) self.net_income.delta_perc = get_delta_perc(delta, last_net_income) self.net_income.is_total = True self.append(self.income) self.append(self.expenses) self.append(self.net_income)
def __init__(self, ref, repeat_type, repeat_every): if repeat_type not in RTYPE2INCFUNC: # invalid repeat type, default to monthly repeat_type = RepeatType.Monthly #: :class:`.Transaction`. The model transaction that's going to be regularly spawned. self.ref = ref self._repeat_type = repeat_type self._repeat_every = repeat_every #: Date at which our recurrence stops. When ``None`` (the default), it never ends. self.stop_date = None #: ``recurrent_date -> transaction`` mapping of schedule exceptions. self.date2exception = {} #: ``recurrent_date -> transaction`` mapping of *global* schedule exceptions. self.date2globalchange = {} #: ``recurrent_date -> transaction`` mapping of spawns. Used as a cache. Frequently purged. self.date2instances = {} self.rtype2desc = { RepeatType.Daily: tr('Daily'), RepeatType.Weekly: tr('Weekly'), RepeatType.Monthly: tr('Monthly'), RepeatType.Yearly: tr('Yearly'), RepeatType.Weekday: '', # dynamic RepeatType.WeekdayLast: '', # dynamic } self._update_rtype_descs()
def change_schedule(self, schedule, new_ref, repeat_type, repeat_every, stop_date): for split in new_ref.splits: if split.account is not None: # same as in change_transaction() split.account = self.accounts.find(split.account.name, split.account.type) if schedule in self.schedules: action = Action(tr('Change Schedule')) action.change_schedule(schedule) else: action = Action(tr('Add Schedule')) action.added_schedules.add(schedule) self._undoer.record(action) original = schedule.ref min_date = min(original.date, new_ref.date) original.set_splits(new_ref.splits) original.change(description=new_ref.description, payee=new_ref.payee, checkno=new_ref.checkno, notes=new_ref.notes) schedule.start_date = new_ref.date schedule.repeat_type = repeat_type schedule.repeat_every = repeat_every schedule.stop_date = stop_date schedule.reset_spawn_cache() if schedule not in self.schedules: self.schedules.append(schedule) self._cook(from_date=min_date) self.notify('schedule_changed')
def _refresh(self): self.clear() self.has_multiple_currencies = self.document.accounts.has_multiple_currencies( ) self.assets = self.make_type_node(tr('ASSETS'), AccountType.Asset) self.liabilities = self.make_type_node(tr('LIABILITIES'), AccountType.Liability) self.net_worth = self._make_node(tr('NET WORTH')) net_worth_start = self.assets.start_amount - self.liabilities.start_amount net_worth_end = self.assets.end_amount - self.liabilities.end_amount budget_date_range = DateRange(date.today(), self.document.date_range.end) # The net worth's budget is not a simple subtraction, it must count the target-less budgets net_worth_budgeted = self.document.budgeted_amount_for_target( None, budget_date_range) net_worth_delta = net_worth_end - net_worth_start force_explicit_currency = self.has_multiple_currencies self.net_worth.start = self.document.format_amount( net_worth_start, force_explicit_currency=force_explicit_currency) self.net_worth.end = self.document.format_amount( net_worth_end, force_explicit_currency=force_explicit_currency) self.net_worth.budgeted = self.document.format_amount( net_worth_budgeted, force_explicit_currency=force_explicit_currency) self.net_worth.delta = self.document.format_amount( net_worth_delta, force_explicit_currency=force_explicit_currency) self.net_worth.delta_perc = get_delta_perc(net_worth_delta, net_worth_start) self.net_worth.is_total = True self.append(self.assets) self.append(self.liabilities) self.append(self.net_worth)
def getmatches_by_contents(files, sizeattr='size', partial=False, j=job.nulljob): """Returns a list of :class:`Match` within ``files`` if their contents is the same. :param str sizeattr: attibute name of the :class:`~core.fs.file` that returns the size of the file to use for comparison. :param bool partial: if true, will use the "md5partial" attribute instead of "md5" to compute contents hash. :param j: A :ref:`job progress instance <jobs>`. """ j = j.start_subjob([2, 8]) size2files = defaultdict(set) for file in j.iter_with_progress(files, tr("Read size of %d/%d files")): filesize = getattr(file, sizeattr) if filesize: size2files[filesize].add(file) possible_matches = [files for files in size2files.values() if len(files) > 1] del size2files result = [] j.start_job(len(possible_matches), tr("0 matches found")) for group in possible_matches: for first, second in itertools.combinations(group, 2): if first.is_ref and second.is_ref: continue # Don't spend time comparing two ref pics together. if first.md5partial == second.md5partial: if partial or first.md5 == second.md5: result.append(Match(first, second, 100)) j.add_progress(desc=tr("%d matches found") % len(result)) return result
def _job_completed(self, jobid): if jobid == JobType.Scan: self._results_changed() if not self.results.groups: self.view.show_message(tr("No duplicates found.")) else: self.view.show_results_window() if jobid in {JobType.Move, JobType.Delete}: self._results_changed() if jobid == JobType.Load: self._recreate_result_table() self._results_changed() self.view.show_results_window() if jobid in {JobType.Copy, JobType.Move, JobType.Delete}: if self.results.problems: self.problem_dialog.refresh() self.view.show_problem_dialog() else: msg = { JobType.Copy: tr("All marked files were copied successfully."), JobType.Move: tr("All marked files were moved successfully."), JobType.Delete: tr("All marked files were successfully sent to Trash."), }[jobid] self.view.show_message(msg)
def _refresh(self): self.clear() self.has_multiple_currencies = self.document.accounts.has_multiple_currencies( ) self.income = self.make_type_node(tr('INCOME'), AccountType.Income) self.expenses = self.make_type_node(tr('EXPENSES'), AccountType.Expense) self.net_income = self._make_node(tr('NET INCOME')) net_income = self.income.cash_flow_amount - self.expenses.cash_flow_amount last_net_income = self.income.last_cash_flow_amount - self.expenses.last_cash_flow_amount net_budgeted = self.income.budgeted_amount - self.expenses.budgeted_amount delta = net_income - last_net_income force_explicit_currency = self.has_multiple_currencies self.net_income.cash_flow = self.document.format_amount( net_income, force_explicit_currency=force_explicit_currency) self.net_income.last_cash_flow = self.document.format_amount( last_net_income, force_explicit_currency=force_explicit_currency) self.net_income.budgeted = self.document.format_amount( net_budgeted, force_explicit_currency=force_explicit_currency) self.net_income.delta = self.document.format_amount( delta, force_explicit_currency=force_explicit_currency) self.net_income.delta_perc = get_delta_perc(delta, last_net_income) self.net_income.is_total = True self.append(self.income) self.append(self.expenses) self.append(self.net_income)
def getmatches_by_contents(files, sizeattr="size", partial=False, j=job.nulljob): """Returns a list of :class:`Match` within ``files`` if their contents is the same. :param str sizeattr: attibute name of the :class:`~core.fs.file` that returns the size of the file to use for comparison. :param bool partial: if true, will use the "md5partial" attribute instead of "md5" to compute contents hash. :param j: A :ref:`job progress instance <jobs>`. """ j = j.start_subjob([2, 8]) size2files = defaultdict(set) for file in j.iter_with_progress(files, tr("Read size of %d/%d files")): filesize = getattr(file, sizeattr) if filesize: size2files[filesize].add(file) possible_matches = [files for files in size2files.values() if len(files) > 1] del size2files result = [] j.start_job(len(possible_matches), tr("0 matches found")) for group in possible_matches: for first, second in itertools.combinations(group, 2): if first.is_ref and second.is_ref: continue # Don't spend time comparing two ref pics together. if first.md5partial == second.md5partial: if partial or first.md5 == second.md5: result.append(Match(first, second, 100)) j.add_progress(desc=tr("%d matches found") % len(result)) return result
def get_scan_options(): return [ ScanOption(ScanType.Filename, tr("Filename")), ScanOption(ScanType.Fields, tr("Filename - Fields")), ScanOption(ScanType.FieldsNoOrder, tr("Filename - Fields (No Order)")), ScanOption(ScanType.Tag, tr("Tags")), ScanOption(ScanType.Contents, tr("Contents")), ]
def add_directory(self, d): try: self.directories.add_path(Path(d)) self.notify('directories_changed') except directories.AlreadyThereError: self.view.show_message(tr("'{}' already is in the list.").format(d)) except directories.InvalidPathError: self.view.show_message(tr("'{}' does not exist.").format(d))
def get_scan_options(): return [ ScanOption(ScanType.FILENAME, tr("Filename")), ScanOption(ScanType.FIELDS, tr("Filename - Fields")), ScanOption(ScanType.FIELDSNOORDER, tr("Filename - Fields (No Order)")), ScanOption(ScanType.TAG, tr("Tags")), ScanOption(ScanType.CONTENTS, tr("Contents")), ]
def new_group(self, type): name = self.groups.new_name(tr('New group'), type) group = Group(name, type) action = Action(tr('Add group')) action.added_groups.add(group) self._undoer.record(action) self.groups.append(group) self.notify('account_added') return group
def getmatches( objects, min_match_percentage=0, match_similar_words=False, weight_words=False, no_field_order=False, j=job.nulljob ): """Returns a list of :class:`Match` within ``objects`` after fuzzily matching their words. :param objects: List of :class:`~core.fs.File` to match. :param int min_match_percentage: minimum % of words that have to match. :param bool match_similar_words: make similar words (see :func:`merge_similar_words`) match. :param bool weight_words: longer words are worth more in match % computations. :param bool no_field_order: match :ref:`fields` regardless of their order. :param j: A :ref:`job progress instance <jobs>`. """ COMMON_WORD_THRESHOLD = 50 LIMIT = 5000000 j = j.start_subjob(2) sj = j.start_subjob(2) for o in objects: if not hasattr(o, "words"): o.words = getwords(o.name) word_dict = build_word_dict(objects, sj) reduce_common_words(word_dict, COMMON_WORD_THRESHOLD) if match_similar_words: merge_similar_words(word_dict) match_flags = [] if weight_words: match_flags.append(WEIGHT_WORDS) if match_similar_words: match_flags.append(MATCH_SIMILAR_WORDS) if no_field_order: match_flags.append(NO_FIELD_ORDER) j.start_job(len(word_dict), tr("0 matches found")) compared = defaultdict(set) result = [] try: # This whole 'popping' thing is there to avoid taking too much memory at the same time. while word_dict: items = word_dict.popitem()[1] while items: ref = items.pop() compared_already = compared[ref] to_compare = items - compared_already compared_already |= to_compare for other in to_compare: m = get_match(ref, other, match_flags) if m.percentage >= min_match_percentage: result.append(m) if len(result) >= LIMIT: return result j.add_progress(desc=tr("%d matches found") % len(result)) except MemoryError: # This is the place where the memory usage is at its peak during the scan. # Just continue the process with an incomplete list of matches. del compared # This should give us enough room to call logging. logging.warning("Memory Overflow. Matches: %d. Word dict: %d" % (len(result), len(word_dict))) return result return result
def new_account(self, type, group): name = self.accounts.new_name(tr('New account')) account = Account(name, self.default_currency, type) account.group = group action = Action(tr('Add account')) action.added_accounts.add(account) self._undoer.record(action) self.accounts.add(account) self.notify('account_added') return account
def export_to_csv(self): """Export current results to CSV. The columns and their order in the resulting CSV file is determined in the same way as in :meth:`export_to_xhtml`. """ dest_file = self.view.select_dest_file(tr("Select a destination for your exported CSV"), 'csv') if dest_file: colnames, rows = self._get_export_data() try: export.export_to_csv(dest_file, colnames, rows) except OSError as e: self.view.show_message(tr("Couldn't write to file: {}").format(str(e)))
def _get_action_from_changed_transactions(self, transactions, global_scope=False): if len(transactions) == 1 and not isinstance(transactions[0], Spawn) \ and transactions[0] not in self.transactions: action = Action(tr('Add transaction')) action.added_transactions.add(transactions[0]) else: action = Action(tr('Change transaction')) action.change_transactions(transactions) if global_scope: spawns, txns = extract(lambda x: isinstance(x, Spawn), transactions) for schedule in {spawn.recurrence for spawn in spawns}: action.change_schedule(schedule) return action
def add_directory(self, d): """Adds folder ``d`` to :attr:`directories`. Shows an error message dialog if something bad happens. :param str d: path of folder to add """ try: self.directories.add_path(Path(d)) self.notify('directories_changed') except directories.AlreadyThereError: self.view.show_message(tr("'{}' already is in the list.").format(d)) except directories.InvalidPathError: self.view.show_message(tr("'{}' does not exist.").format(d))
def add_directory(self, d): """Adds folder ``d`` to :attr:`directories`. Shows an error message dialog if something bad happens. :param str d: path of folder to add """ try: self.directories.add_path(Path(d)) self.notify("directories_changed") except directories.AlreadyThereError: self.view.show_message(tr("'{}' already is in the list.").format(d)) except directories.InvalidPathError: self.view.show_message(tr("'{}' does not exist.").format(d))
def export_to_csv(self): """Export current results to CSV. The columns and their order in the resulting CSV file is determined in the same way as in :meth:`export_to_xhtml`. """ dest_file = self.view.select_dest_file( tr("Select a destination for your exported CSV"), 'csv') if dest_file: colnames, rows = self._get_export_data() try: export.export_to_csv(dest_file, colnames, rows) except OSError as e: self.view.show_message( tr("Couldn't write to file: {}").format(str(e)))
def _job_error(self, jobid, err): if jobid == JobType.Load: msg = tr("Could not load file: {}").format(err) self.view.show_message(msg) return False else: raise err
def __init__(self, document): DocumentGUIObject.__init__(self, document) self._selected_pane_index = 0 self._selected_target_index = 0 def setfunc(index): self.view.set_swap_button_enabled(self.can_perform_swap()) self.swap_type_list = LinkedSelectableList(items=[ "<placeholder> Day <--> Month", "<placeholder> Month <--> Year", "<placeholder> Day <--> Year", tr("Description <--> Payee"), tr("Invert Amounts"), ], setfunc=setfunc) self.swap_type_list.selected_index = SwapType.DayMonth self.panes = [] self.import_table = ImportTable(self)
def __init__(self, table, account, date, total_debit, total_credit): super(TotalRow, self).__init__(table, account) self._date = date self._description = tr('TOTAL') # don't touch _increase and _decrease, they trigger editing. self._debit_fmt = table.document.format_amount(total_debit, blank_zero=True) self._credit_fmt = table.document.format_amount(total_credit, blank_zero=True) delta = total_debit - total_credit if delta: if account.is_credit_account(): delta *= -1 positive = delta > 0 # format_amount doesn't explicitly put positive signs, so we have to put it ourselves. # However, if the delta is of foreign currency, we want the sign to be in front of the # amount, not in front of the currency code. delta_fmt = table.document.format_amount(abs(delta)) sign = '+' if positive else '-' if delta_fmt[0].isdigit(): delta_fmt = sign + delta_fmt else: # we have a currency code in front of our amount, a little trick is to replace the # only space character we have by space + sign delta_fmt = delta_fmt.replace(' ', ' ' + sign) self._balance_fmt = delta_fmt else: self._balance_fmt = '' self.is_bold = True
def _load(self): lines = self.lines[:] colcount = len(lines[0]) if lines else 0 columns = self.columns[:colcount] self._merge_columns(columns, lines) ci = {} for index, field in enumerate(columns): if field is not None: ci[field] = index hasdate = CsvField.Date in ci hasamount = (CsvField.Amount in ci) or (CsvField.Increase in ci and CsvField.Decrease in ci) if not (hasdate and hasamount): raise FileLoadError(tr("The Date and Amount columns must be set.")) self.account_info.name = 'CSV Import' self.parsing_date_format, lines_to_load = self._parse_date_format(lines, ci) self._check_amount_values(lines_to_load, ci) for line in lines_to_load: self.start_transaction() for attr, index in ci.items(): value = line[index] if attr == CsvField.Date: value = self.parse_date_str(value, self.parsing_date_format) elif attr == CsvField.Increase: attr = CsvField.Amount elif attr == CsvField.Decrease: attr = CsvField.Amount if value.strip() and not value.startswith('-'): value = '-' + value if isinstance(value, str): value = value.strip() if value: setattr(self.transaction_info, attr, value)
def toggle_entries_reconciled(self, entries): """Toggle the reconcile flag of `entries`. """ if not entries: return all_reconciled = not entries or all(entry.reconciled for entry in entries) newvalue = not all_reconciled action = Action(tr('Change reconciliation')) action.change_splits([e.split for e in entries]) min_date = min(entry.date for entry in entries) splits = [entry.split for entry in entries] spawns, splits = extract(lambda s: isinstance(s.transaction, Spawn), splits) for spawn in spawns: action.change_schedule(spawn.transaction.recurrence) self._undoer.record(action) if newvalue: for split in splits: split.reconciliation_date = split.transaction.date for spawn in spawns: #XXX update transaction selection materialized_split = self._reconcile_spawn_split(spawn, spawn.transaction.date) action.added_transactions.add(materialized_split.transaction) else: for split in splits: split.reconciliation_date = None self._cook(from_date=min_date) self.notify('transaction_changed')
def parse_file_for_import(self, filename): """Parses ``filename`` in preparation for importing. Opens and parses ``filename`` and try to determine its format by successively trying to read is as a moneyGuru file, an OFX, a QIF and finally a CSV. Once parsed, take the appropriate action for the file which is either to show the CSV options window or to call :meth:`load_parsed_file_for_import`. """ default_date_format = DateFormat(self.app.date_format).sys_format for loaderclass in (native.Loader, ofx.Loader, qif.Loader, csv.Loader): try: loader = loaderclass(self.document.default_currency, default_date_format=default_date_format) loader.parse(filename) break except FileFormatError: pass else: # No file fitted raise FileFormatError(tr('%s is of an unknown format.') % filename) self.loader = loader if isinstance(self.loader, csv.Loader): self.csv_options.show() else: self.load_parsed_file_for_import()
def load_from_xml(self, filename): loader = native.Loader(self.default_currency) try: loader.parse(filename) except FileFormatError: raise FileFormatError(tr('"%s" is not a moneyGuru file') % filename) loader.load() self._clear() self._document_id = loader.document_id for propname in self._properties: if propname in loader.properties: self._properties[propname] = loader.properties[propname] for group in loader.groups: self.groups.append(group) for account in loader.accounts: self.accounts.add(account) for transaction in loader.transactions: self.transactions.add(transaction, position=transaction.position) for recurrence in loader.schedules: self.schedules.append(recurrence) for budget in loader.budgets: self.budgets.append(budget) self.accounts.default_currency = self.default_currency self._cook() self._restore_preferences_after_load() self.notify('document_changed') self._undoer.set_save_point() self._refresh_date_range()
def _refresh_totals(self): selected, total, total_debit, total_credit = self.gltable.get_totals() total_debit_fmt = self.document.format_amount(total_debit) total_credit_fmt = self.document.format_amount(total_credit) msg = tr("{0} out of {1} selected. Debit: {2} Credit: {3}") self.status_line = msg.format(selected, total, total_debit_fmt, total_credit_fmt)
def invoke_custom_command(self): """Calls command in ``CustomCommand`` pref with ``%d`` and ``%r`` placeholders replaced. Using the current selection, ``%d`` is replaced with the currently selected dupe and ``%r`` is replaced with that dupe's ref file. If there's no selection, the command is not invoked. If the dupe is a ref, ``%d`` and ``%r`` will be the same. """ cmd = self.view.get_default('CustomCommand') if not cmd: msg = tr("You have no custom command set up. Set it up in your preferences.") self.view.show_message(msg) return if not self.selected_dupes: return dupe = self.selected_dupes[0] group = self.results.get_group_of_duplicate(dupe) ref = group.ref cmd = cmd.replace('%d', str(dupe.path)) cmd = cmd.replace('%r', str(ref.path)) match = re.match(r'"([^"]+)"(.*)', cmd) if match is not None: # This code here is because subprocess. Popen doesn't seem to accept, under Windows, # executable paths with spaces in it, *even* when they're enclosed in "". So this is # a workaround to make the damn thing work. exepath, args = match.groups() path, exename = op.split(exepath) subprocess.Popen(exename + args, shell=True, cwd=path) else: subprocess.Popen(cmd, shell=True)
def _start_job(self, jobid, func, args=()): title = JOBID2TITLE[jobid] try: self.progress_window.run(jobid, title, func, args=args) except job.JobInProgressError: msg = tr("A previous action is still hanging in there. You can't start a new one yet. Wait a few seconds, then try again.") self.view.show_message(msg)
def delete_accounts(self, accounts, reassign_to=None): action = Action(tr('Remove account')) accounts = set(accounts) action.delete_accounts(accounts) affected_schedules = [s for s in self.schedules if accounts & s.affected_accounts()] for schedule in affected_schedules: action.change_schedule(schedule) for account in accounts: affected_budgets = [b for b in self.budgets if b.account is account or b.target is account] if account.is_income_statement_account() and reassign_to is None: action.deleted_budgets |= set(affected_budgets) else: for budget in affected_budgets: action.change_budget(budget) self._undoer.record(action) for account in accounts: self.transactions.reassign_account(account, reassign_to) for schedule in affected_schedules: schedule.reassign_account(account, reassign_to) for budget in affected_budgets: if budget.account is account: if reassign_to is None: self.budgets.remove(budget) else: budget.account = reassign_to elif budget.target is account: budget.target = reassign_to self.accounts.remove(account) self._cook() self.notify('account_deleted')
def invoke_custom_command(self): """Calls command in ``CustomCommand`` pref with ``%d`` and ``%r`` placeholders replaced. Using the current selection, ``%d`` is replaced with the currently selected dupe and ``%r`` is replaced with that dupe's ref file. If there's no selection, the command is not invoked. If the dupe is a ref, ``%d`` and ``%r`` will be the same. """ cmd = self.view.get_default('CustomCommand') if not cmd: msg = tr( "You have no custom command set up. Set it up in your preferences." ) self.view.show_message(msg) return if not self.selected_dupes: return dupe = self.selected_dupes[0] group = self.results.get_group_of_duplicate(dupe) ref = group.ref cmd = cmd.replace('%d', str(dupe.path)) cmd = cmd.replace('%r', str(ref.path)) match = re.match(r'"([^"]+)"(.*)', cmd) if match is not None: # This code here is because subprocess. Popen doesn't seem to accept, under Windows, # executable paths with spaces in it, *even* when they're enclosed in "". So this is # a workaround to make the damn thing work. exepath, args = match.groups() path, exename = op.split(exepath) subprocess.Popen(exename + args, shell=True, cwd=path) else: subprocess.Popen(cmd, shell=True)
def split_values(self, row_index, split_row_index): splits = self._get_splits_at_row(row_index) split = splits[split_row_index] account_name = split.account.name if split.account is not None else tr( 'Unassigned') return SplitValues(account_name, split.memo, self.document.format_amount(split.amount))
def start_scanning(self): """Starts an async job to scan for duplicates. Scans folders selected in :attr:`directories` and put the results in :attr:`results` """ scanner = self.SCANNER_CLASS() if not self.directories.has_any_file(): self.view.show_message(tr("The selected directories contain no scannable file.")) return # Send relevant options down to the scanner instance for k, v in self.options.items(): if hasattr(scanner, k): setattr(scanner, k, v) self.results.groups = [] self._results_changed() def do(j): j.set_progress(0, tr("Collecting files to scan")) if scanner.scan_type == ScanType.Folders: files = list(self.directories.get_folders(folderclass=self.folderclass, j=j)) else: files = list(self.directories.get_files(fileclasses=self.fileclasses, j=j)) if self.options['ignore_hardlink_matches']: files = self._remove_hardlink_dupes(files) logging.info('Scanning %d files' % len(files)) self.results.groups = scanner.get_dupe_groups(files, self.ignore_list, j) self.discarded_file_count = scanner.discarded_file_count self._start_job(JobType.Scan, do)
def clear(self): if not self.ignore_list: return msg = tr("Do you really want to remove all %d items from the ignore list?") % len(self.ignore_list) if self.app.view.ask_yes_no(msg): self.ignore_list.Clear() self.refresh()
def show(self): self._default_layout = Layout(tr('Default')) self.layout = self._default_layout self._refresh_columns() self._refresh_lines() self._refresh_targets() self.view.refresh_layout_menu() self.view.show()
def _check_demo(self): if self.should_apply_demo_limitation and self.results.mark_count > 10: msg = tr( "You cannot delete, move or copy more than 10 duplicates at once in demo mode." ) self.view.show_message(msg) return False return True
class SwapDescriptionPayeeAction(BaseSwapFields): NAME = "Swap Description Payee Import Action" ACTION_NAME = tr("Description <--> Payee") PRIORITY = 4 def _switch_function(self, txn): txn.description, txn.payee = txn.payee, txn.description
def _refresh_totals(self): selected = len(self.mainwindow.selected_transactions) total = len(self.visible_transactions) currency = self.document.default_currency total_amount = sum(convert_amount(t.amount, currency, t.date) for t in self.mainwindow.selected_transactions) total_amount_fmt = self.document.format_amount(total_amount) msg = tr("{0} out of {1} selected. Amount: {2}") self.status_line = msg.format(selected, total, total_amount_fmt)
def get_dupe_groups(self, files, ignore_list=None, j=job.nulljob): for f in (f for f in files if not hasattr(f, "is_ref")): f.is_ref = False files = remove_dupe_paths(files) logging.info("Getting matches. Scan type: %d", self.scan_type) matches = self._getmatches(files, j) logging.info("Found %d matches" % len(matches)) j.set_progress(100, tr("Almost done! Fiddling with results...")) # In removing what we call here "false matches", we first want to remove, if we scan by # folders, we want to remove folder matches for which the parent is also in a match (they're # "duplicated duplicates if you will). Then, we also don't want mixed file kinds if the # option isn't enabled, we want matches for which both files exist and, lastly, we don't # want matches with both files as ref. if self.scan_type == ScanType.FOLDERS and matches: allpath = {m.first.path for m in matches} allpath |= {m.second.path for m in matches} sortedpaths = sorted(allpath) toremove = set() last_parent_path = sortedpaths[0] for p in sortedpaths[1:]: if p in last_parent_path: toremove.add(p) else: last_parent_path = p matches = [m for m in matches if m.first.path not in toremove or m.second.path not in toremove] if not self.mix_file_kind: matches = [m for m in matches if get_file_ext(m.first.name) == get_file_ext(m.second.name)] matches = [m for m in matches if m.first.path.exists() and m.second.path.exists()] matches = [m for m in matches if not (m.first.is_ref and m.second.is_ref)] if ignore_list: matches = [m for m in matches if not ignore_list.are_ignored(str(m.first.path), str(m.second.path))] logging.info("Grouping matches") groups = engine.get_groups(matches) if self.scan_type in { ScanType.FILENAME, ScanType.FIELDS, ScanType.FIELDSNOORDER, ScanType.TAG, }: matched_files = dedupe([m.first for m in matches] + [m.second for m in matches]) self.discarded_file_count = len(matched_files) - sum(len(g) for g in groups) else: # Ticket #195 # To speed up the scan, we don't bother comparing contents of files that are both ref # files. However, this messes up "discarded" counting because there's a missing match # in cases where we end up with a dupe group anyway (with a non-ref file). Because it's # impossible to have discarded matches in exact dupe scans, we simply set it at 0, thus # bypassing our tricky problem. # Also, although ScanType.FuzzyBlock is not always doing exact comparisons, we also # bypass ref comparison, thus messing up with our "discarded" count. So we're # effectively disabling the "discarded" feature in PE, but it's better than falsely # reporting discarded matches. self.discarded_file_count = 0 groups = [g for g in groups if any(not f.is_ref for f in g)] logging.info("Created %d groups" % len(groups)) for g in groups: g.prioritize(self._key_func, self._tie_breaker) return groups
def reprioritize_groups(self, sort_key): count = 0 for group in self.results.groups: if group.prioritize(key_func=sort_key): count += 1 self._results_changed() msg = tr("{} duplicate groups were changed by the re-prioritization." ).format(count) self.view.show_message(msg)
def _start_job(self, jobid, func, args=()): title = JOBID2TITLE[jobid] try: self.progress_window.run(jobid, title, func, args=args) except job.JobInProgressError: msg = tr( "A previous action is still hanging in there. You can't start a new one yet. Wait " "a few seconds, then try again.") self.view.show_message(msg)
def clear(self): if not self.ignore_list: return msg = tr( "Do you really want to remove all %d items from the ignore list?" ) % len(self.ignore_list) if self.app.view.ask_yes_no(msg): self.ignore_list.Clear() self.refresh()