def _reorder_following_line(self, reorder_lines): # Reordering from a line is a bit more complex than it seems. We have to find intersection # points from all lines in all rects (when there's an interestion, of course). Then, for # each intersection, we compute the distance of it from the origin of the order arrow. # We sort our elements by that distance and we have our new order! neworder = [] for reorder_line in reorder_lines: neworder += self._get_intersections(reorder_line) neworder = dedupe(neworder) if len(neworder) < 2: return # nothing to reorder # ok, we have our new order. That was easy huh? Now, what we have to do is to insert that # new order in the rest of the elements, which might not all be in our new order. That's # a bit complex too because when drawing arrows, a naive re-ordering is counter intuitive. # I won't explain why what is done below is done thus because it's hard to describe, but # here's what we do: We compute the min and max of old orders in our elems. Whatever came # before min stays before. Whatever came after max stays *right* after, but not at the end. # The end of the order is constituted of what is between min and max, but isn't in the new # order. Arrow-wise, these elements have been "orphaned" by the new link and the most # intuitive thing to do with them is to put them at the end of the order. minorder = min(e.order for e in neworder) maxorder = max(e.order for e in neworder) all_elems = list(self._ordered_elems()) affected_elems = set(neworder) unaffected_elems = [e for e in all_elems if e not in affected_elems] before = [e for e in unaffected_elems if e.order < minorder] after = [e for e in unaffected_elems if e.order > maxorder] inbetween = [e for e in unaffected_elems if minorder < e.order < maxorder] # we also add ignore elems to our big concat so that their order value doesn't conflict ignored = list(self._ignored_elems()) concat = before + neworder + after + inbetween + ignored for i, elem in enumerate(concat): elem.order = i
def get_dupe_groups(self, files, j=job.nulljob): j = j.start_subjob([8, 2]) for f in (f for f in files if not hasattr(f, 'is_ref')): f.is_ref = False files = remove_dupe_paths(files) logging.info("Getting matches. Scan type: %d", self.scan_type) matches = self._getmatches(files, j) logging.info('Found %d matches' % len(matches)) j.set_progress(100, tr("Removing false matches")) # In removing what we call here "false matches", we first want to remove, if we scan by # folders, we want to remove folder matches for which the parent is also in a match (they're # "duplicated duplicates if you will). Then, we also don't want mixed file kinds if the # option isn't enabled, we want matches for which both files exist and, lastly, we don't # want matches with both files as ref. if self.scan_type == ScanType.Folders and matches: allpath = {m.first.path for m in matches} allpath |= {m.second.path for m in matches} sortedpaths = sorted(allpath) toremove = set() last_parent_path = sortedpaths[0] for p in sortedpaths[1:]: if p in last_parent_path: toremove.add(p) else: last_parent_path = p matches = [m for m in matches if m.first.path not in toremove or m.second.path not in toremove] if not self.mix_file_kind: matches = [m for m in matches if get_file_ext(m.first.name) == get_file_ext(m.second.name)] matches = [m for m in matches if m.first.path.exists() and m.second.path.exists()] matches = [m for m in matches if not (m.first.is_ref and m.second.is_ref)] if self.ignore_list: j = j.start_subjob(2) iter_matches = j.iter_with_progress(matches, tr("Processed %d/%d matches against the ignore list")) matches = [ m for m in iter_matches if not self.ignore_list.AreIgnored(str(m.first.path), str(m.second.path)) ] logging.info('Grouping matches') groups = engine.get_groups(matches, j) matched_files = dedupe([m.first for m in matches] + [m.second for m in matches]) if self.scan_type in {ScanType.Filename, ScanType.Fields, ScanType.FieldsNoOrder, ScanType.Tag}: self.discarded_file_count = len(matched_files) - sum(len(g) for g in groups) else: # Ticket #195 # To speed up the scan, we don't bother comparing contents of files that are both ref # files. However, this messes up "discarded" counting because there's a missing match # in cases where we end up with a dupe group anyway (with a non-ref file). Because it's # impossible to have discarded matches in exact dupe scans, we simply set it at 0, thus # bypassing our tricky problem. # Also, although ScanType.FuzzyBlock is not always doing exact comparisons, we also # bypass ref comparison, thus messing up with our "discarded" count. So we're # effectively disabling the "discarded" feature in PE, but it's better than falsely # reporting discarded matches. self.discarded_file_count = 0 groups = [g for g in groups if any(not f.is_ref for f in g)] logging.info('Created %d groups' % len(groups)) j.set_progress(100, tr("Doing group prioritization")) for g in groups: g.prioritize(self._key_func, self._tie_breaker) return groups
def __Perform(self, copy=False, job=nulljob): try: cd_operations = [ t for t in self.name_list if t[0][0].startswith('!') ] normal_operations = [ t for t in self.name_list if t not in cd_operations ] cds = dedupe(t[0][0][1:] for t in cd_operations) job_count = len(cds) if normal_operations: job_count += 1 newjob = job.start_subjob(job_count) if normal_operations: self.__ProcessNormalList(normal_operations, copy, newjob) for cd in cds: cd_location = [ location for location in self.cd_locations if location.name == cd ][0] cd_path = self.OnNeedCD(cd_location) if not cd_path: return False name_list = [(t[0][1:], t[1]) for t in cd_operations if t[0][0][1:] == cd] if not self.__ProcessCDList(name_list, Path(cd_path), cd_location, newjob): return False return True except JobCancelled: return False
def _perform_action(self, import_action, apply=ActionSelectionOptions.ApplyToPane): if self.selected_pane is None: return action_params = self._collect_action_params(import_action, apply) if not action_params: return panes = dedupe(flatten((grp[2] for grp in action_params))) for pane in panes: pane.match_flag = False pane.import_document.cook_flag = False for action_param in action_params: import_action.perform_action(*action_param) for pane in panes: if not pane.import_document.cook_flag: pane.import_document.cook() for pane in panes: if not pane.match_flag: pane.match_entries() self.import_table.refresh()
def guess_date_format(self, str_dates): totry = DATE_FORMATS[:] extra = [] if self.NATIVE_DATE_FORMAT: extra.append(self.NATIVE_DATE_FORMAT) if self.EXTRA_DATE_FORMATS: extra += self.EXTRA_DATE_FORMATS if self.default_date_format: extra.append(self.default_date_format) for format in dedupe(extra + totry): found_at_least_one = False for str_date in str_dates: try: datetime.datetime.strptime(str_date, format) found_at_least_one = True except ValueError: logging.debug( "Failed try to read the date {0} with the format {1}". format(str_date, format)) break else: if found_at_least_one: logging.debug("Correct date format: {0}".format(format)) return format return None
def mimeData(self, indexes): nodes = dedupe(index.internalPointer() for index in indexes) paths = [str(self.name + node.ref.path) for node in nodes] data = '\n'.join(paths).encode('utf-8') mimeData = QMimeData() mimeData.setData(MIME_PATHS, QByteArray(data)) return mimeData
def _collect_action_params(self, import_action, apply=ActionSelectionOptions.ApplyToPane): if apply == ActionSelectionOptions.ApplyToSelection: if not self.selected_pane: return [] panes = [self.selected_pane] selected_rows = self.import_table.selected_rows if not selected_rows: return [] transactions = [row.imported.transaction for row in selected_rows if row.imported] transactions = dedupe(transactions) can_perform_action = import_action.can_perform_action(panes[0].import_document, transactions, panes, selected_rows) if not can_perform_action: return [] return [(panes[0].import_document, transactions, panes, selected_rows)] if apply == ActionSelectionOptions.ApplyToAll: panes = self.panes.copy() else: panes = [self.selected_pane] results = [] # Groups of panes which share the same import document pane_groups = unique_groups(panes, lambda p: p.import_document) selected_group = None for ps in pane_groups: transactions = [e.transaction for p in ps for e in p.account.entries] transactions = dedupe(transactions) can_perform_action = import_action.can_perform_action(ps[0].import_document, transactions, ps) if not can_perform_action: continue if self.selected_pane in ps: selected_group = (ps[0].import_document, transactions, ps) else: results.append((ps[0].import_document, transactions, ps)) # We want to ensure that the selected pane group is the last to be called # allowing the name of the plugin to update. if selected_group: results.append(selected_group) return results
def get_dupe_groups(self, files, ignore_list=None, j=job.nulljob): for f in (f for f in files if not hasattr(f, "is_ref")): f.is_ref = False files = remove_dupe_paths(files) logging.info("Getting matches. Scan type: %d", self.scan_type) matches = self._getmatches(files, j) logging.info("Found %d matches" % len(matches)) j.set_progress(100, tr("Almost done! Fiddling with results...")) # In removing what we call here "false matches", we first want to remove, if we scan by # folders, we want to remove folder matches for which the parent is also in a match (they're # "duplicated duplicates if you will). Then, we also don't want mixed file kinds if the # option isn't enabled, we want matches for which both files exist and, lastly, we don't # want matches with both files as ref. if self.scan_type == ScanType.FOLDERS and matches: allpath = {m.first.path for m in matches} allpath |= {m.second.path for m in matches} sortedpaths = sorted(allpath) toremove = set() last_parent_path = sortedpaths[0] for p in sortedpaths[1:]: if p in last_parent_path: toremove.add(p) else: last_parent_path = p matches = [m for m in matches if m.first.path not in toremove or m.second.path not in toremove] if not self.mix_file_kind: matches = [m for m in matches if get_file_ext(m.first.name) == get_file_ext(m.second.name)] matches = [m for m in matches if m.first.path.exists() and m.second.path.exists()] matches = [m for m in matches if not (m.first.is_ref and m.second.is_ref)] if ignore_list: matches = [m for m in matches if not ignore_list.are_ignored(str(m.first.path), str(m.second.path))] logging.info("Grouping matches") groups = engine.get_groups(matches) if self.scan_type in { ScanType.FILENAME, ScanType.FIELDS, ScanType.FIELDSNOORDER, ScanType.TAG, }: matched_files = dedupe([m.first for m in matches] + [m.second for m in matches]) self.discarded_file_count = len(matched_files) - sum(len(g) for g in groups) else: # Ticket #195 # To speed up the scan, we don't bother comparing contents of files that are both ref # files. However, this messes up "discarded" counting because there's a missing match # in cases where we end up with a dupe group anyway (with a non-ref file). Because it's # impossible to have discarded matches in exact dupe scans, we simply set it at 0, thus # bypassing our tricky problem. # Also, although ScanType.FuzzyBlock is not always doing exact comparisons, we also # bypass ref comparison, thus messing up with our "discarded" count. So we're # effectively disabling the "discarded" feature in PE, but it's better than falsely # reporting discarded matches. self.discarded_file_count = 0 groups = [g for g in groups if any(not f.is_ref for f in g)] logging.info("Created %d groups" % len(groups)) for g in groups: g.prioritize(self._key_func, self._tie_breaker) return groups
def _getData(self): folder = self.ref parent_volumes = dedupe(song.original.parent_volume for song in folder.iterallfiles()) return [ folder.name, ','.join(l.name for l in parent_volumes), folder.get_stat('filecount'), format_size(folder.get_stat('size'), 2, 2, False), format_time(folder.get_stat('duration')), ]
def _invert_amounts(self, apply_to_all): if apply_to_all: panes = self.panes else: panes = [self.selected_pane] entries = flatten(p.account.entries for p in panes) txns = dedupe(e.transaction for e in entries) for txn in txns: for split in txn.splits: split.amount = -split.amount # Entries, I don't remember why, hold a copy of their split's amount. It has to be updated. for entry in entries: entry.amount = entry.split.amount self.import_table.refresh()
def show_transfer_account(self, row_index=None): if row_index is None: if not self.selected_entries: return row_index = self.selected_index entry = self[row_index].entry splits = entry.transaction.splits accounts = dedupe(split.account for split in splits if split.account is not None) if len(accounts) < 2: return # no transfer index = accounts.index(entry.account) if index < len(accounts) - 1: account_to_show = accounts[index+1] else: account_to_show = accounts[0] self.mainwindow.open_account(account_to_show)
def show_transfer_account(self, row_index=None): if row_index is None: if not self.selected_entries: return row_index = self.selected_index entry = self[row_index].entry splits = entry.transaction.splits accounts = dedupe(split.account for split in splits if split.account is not None) if len(accounts) < 2: return # no transfer index = accounts.index(entry.account) if index < len(accounts) - 1: account_to_show = accounts[index + 1] else: account_to_show = accounts[0] self.mainwindow.open_account(account_to_show)
def _refresh_candidates(self): if self.mainwindow is None or not self.attrname: return doc = self.mainwindow.document attrname = self.attrname if attrname == 'description': self._candidates = doc.transactions.descriptions elif attrname == 'payee': self._candidates = doc.transactions.payees elif attrname in {'from', 'to', 'account', 'transfer'}: result = doc.transactions.account_names # `result` doesn't contain empty accounts' name, so we'll add them. result += [a.name for a in doc.accounts if not a.inactive] if attrname == 'transfer' and self.account is not None: result = [name for name in result if name != self.account.name] self._candidates = result self._candidates = dedupe([name for name in self._candidates if name.strip()])
def __init__(self, partial, candidates): """Build a completion list. 'partial' is the partial value to be completed 'candidates' is the list of candidate values to be tried, the most likely candidate first.""" if not partial: self._completions = None return partial = sort_string(partial) candidates = dedupe(c.strip() for c in candidates) self._completions = [] for candidate in candidates: normalized = sort_string(candidate) if normalized.startswith(partial): self._completions.append(candidate) self._completions.reverse() if self._completions: self._index = len(self._completions) - 1
def _refresh_candidates(self): if self.mainwindow is None or not self.attrname: return doc = self.mainwindow.document attrname = self.attrname if attrname == 'description': self._candidates = doc.transactions.descriptions elif attrname == 'payee': self._candidates = doc.transactions.payees elif attrname in {'from', 'to', 'account', 'transfer'}: result = doc.transactions.account_names # `result` doesn't contain empty accounts' name, so we'll add them. result += [a.name for a in doc.accounts if not a.inactive] if attrname == 'transfer' and self.account is not None: result = [name for name in result if name != self.account.name] self._candidates = result self._candidates = dedupe( [name for name in self._candidates if name.strip()])
def GetNodeData(self, node): if node.is_container: img_name = 'folder_conflict_16' if node.allconflicts else 'folder_16' parent_volumes = dedupe(song.original.parent_volume for song in node.iterallfiles()) return [ node.name, ','.join(l.name for l in parent_volumes), node.get_stat('filecount'), format_size(node.get_stat('size'),2,2,False), format_time(node.get_stat('duration')), img_name, ] else: img_name = 'song_conflict_16' if is_conflicted(node.name) else 'song_16' return [ node.name, node.original.parent_volume.name, 0, format_size(node.size,2,2,False), format_time(node.duration, with_hours=False), img_name, ]
def _get_intersections(self, reorder_line): # return a list of elements that intersect with line, in order. The order depends on the # distance of the elem's first intersection with the line's origin intersections = [] for elem in self._active_elems(): rect = self._elem2drawrect[elem] for line in rect.lines(): inter = reorder_line.intersection_point(line) if inter is not None: dist = inter.distance_to(reorder_line.p1) intersections.append((dist, elem)) if not intersections: # so we cross no line, but we might be in the middle of an elem, in which case we should # return that elem. It's even possible that we're inside multiple rects. This case is # tricky (there's multiple possibilities here). We just choose the elem for which the # center is closest to our origin origin = reorder_line.p1 for elem in self._active_elems(): rect = self._elem2drawrect[elem] if rect.contains_point(origin): intersections.append((origin.distance_to(rect.center()), elem)) intersections.sort(key=lambda tup: tup[0]) return dedupe([elem for dist, elem in intersections])
def guess_date_format(self, str_dates): totry = DATE_FORMATS[:] extra = [] if self.NATIVE_DATE_FORMAT: extra.append(self.NATIVE_DATE_FORMAT) if self.EXTRA_DATE_FORMATS: extra += self.EXTRA_DATE_FORMATS if self.default_date_format: extra.append(self.default_date_format) for format in dedupe(extra + totry): found_at_least_one = False for str_date in str_dates: try: datetime.datetime.strptime(str_date, format) found_at_least_one = True except ValueError: logging.debug("Failed try to read the date {0} with the format {1}".format(str_date, format)) break else: if found_at_least_one: logging.debug("Correct date format: {0}".format(format)) return format return None
def GetNodeData(self, node): if node.is_container: img_name = 'folder_conflict_16' if node.allconflicts else 'folder_16' parent_volumes = dedupe(song.original.parent_volume for song in node.iterallfiles()) return [ node.name, ','.join(l.name for l in parent_volumes), node.get_stat('filecount'), format_size(node.get_stat('size'), 2, 2, False), format_time(node.get_stat('duration')), img_name, ] else: img_name = 'song_conflict_16' if is_conflicted( node.name) else 'song_16' return [ node.name, node.original.parent_volume.name, 0, format_size(node.size, 2, 2, False), format_time(node.duration, with_hours=False), img_name, ]
def criteria_list(self): dupes = flatten(g[:] for g in self.results.groups) values = sorted(dedupe(self.extract_value(d) for d in dupes)) return [Criterion(self, value) for value in values]
def _insertItem(self, item): self._items = dedupe([item] + self._items)[:self._maxItemCount]
def _swap_fields(self, panes, switch_func): entries = flatten(p.account.entries for p in panes) txns = dedupe(e.transaction for e in entries) for txn in txns: switch_func(txn) self.import_table.refresh()
def delete_entries(self, entries): from_account = first(entries).account transactions = dedupe(e.transaction for e in entries) self.delete_transactions(transactions, from_account=from_account)