def make_netx_graph_from_aid_groups(ibs, aids_list, only_reviewed_matches=True, invis_edges=None, ensure_edges=None, temp_nids=None, allow_directed=False): r""" Args: ibs (ibeis.IBEISController): image analysis api aids_list (list): Example: >>> # DISABLE_DOCTEST >>> from ibeis.viz.viz_graph import * # NOQA >>> import ibeis >>> ibs = ibeis.opendb(defaultdb='testdb1') >>> aids_list = [[1, 2, 3, 4], [5, 6, 7]] >>> invis_edges = [(1, 5)] >>> only_reviewed_matches = True >>> graph = make_netx_graph_from_aid_groups(ibs, aids_list, >>> only_reviewed_matches, >>> invis_edges) >>> list(nx.connected_components(graph.to_undirected())) """ #aids_list, nid_list = ibs.group_annots_by_name(aid_list) unique_aids = list(ut.flatten(aids_list)) # grouped version unflat_edges = (list(itertools.product(aids, aids)) for aids in aids_list) aid_pairs = [tup for tup in ut.iflatten(unflat_edges) if tup[0] != tup[1]] aids1 = ut.get_list_column(aid_pairs, 0) aids2 = ut.get_list_column(aid_pairs, 1) if only_reviewed_matches: annotmatch_rowids = ibs.get_annotmatch_rowid_from_superkey( aids1, aids2) annotmatch_rowids = ut.filter_Nones(annotmatch_rowids) aids1 = ibs.get_annotmatch_aid1(annotmatch_rowids) aids2 = ibs.get_annotmatch_aid2(annotmatch_rowids) graph = make_netx_graph_from_aidpairs(ibs, aids1, aids2, unique_aids=unique_aids) if ensure_edges is not None: if ensure_edges == 'all': ensure_edges = list(ut.upper_diag_self_prodx(list(graph.nodes()))) ensure_edges_ = [] for edge in ensure_edges: edge = tuple(edge) redge = tuple(edge[::-1]) # HACK if graph.has_edge(*edge): ensure_edges_.append(edge) pass #nx.set_edge_attributes(graph, 'weight', {edge: .001}) elif (not allow_directed) and graph.has_edge(*redge): ensure_edges_.append(redge) #nx.set_edge_attributes(graph, 'weight', {redge: .001}) pass else: ensure_edges_.append(edge) #graph.add_edge(*edge, weight=.001) graph.add_edge(*edge) if temp_nids is None: unique_nids = ibs.get_annot_nids(list(graph.nodes())) else: # HACK unique_nids = [1] * len(list(graph.nodes())) #unique_nids = temp_nids nx.set_node_attributes(graph, 'nid', dict(zip(graph.nodes(), unique_nids))) import plottool as pt ensure_names_are_connected(graph, aids_list) # Color edges by nid color_by_nids(graph, unique_nids=unique_nids) if invis_edges: for edge in invis_edges: if graph.has_edge(*edge): nx.set_edge_attributes(graph, 'style', {edge: 'invis'}) nx.set_edge_attributes(graph, 'invisible', {edge: True}) else: graph.add_edge(*edge, style='invis', invisible=True) # Hack color images orange if ensure_edges: nx.set_edge_attributes( graph, 'color', {tuple(edge): pt.ORANGE for edge in ensure_edges_}) return graph
def merge_viewpoint_graph(): r""" CommandLine: python -m ibeis.scripts.specialdraw merge_viewpoint_graph --show Example: >>> # DISABLE_DOCTEST >>> from ibeis.scripts.specialdraw import * # NOQA >>> result = merge_viewpoint_graph() >>> print(result) >>> ut.quit_if_noshow() >>> import plottool as pt >>> ut.show_if_requested() """ import plottool as pt import ibeis import networkx as nx defaultdb = 'PZ_Master1' ibs = ibeis.opendb(defaultdb=defaultdb) #nids = None aids = ibs.get_name_aids(4875) ibs.print_annot_stats(aids) left_aids = ibs.filter_annots_general(aids, view='left')[0:3] right_aids = ibs.filter_annots_general(aids, view='right') right_aids = list(set(right_aids) - {14517})[0:3] back = ibs.filter_annots_general(aids, view='back')[0:4] backleft = ibs.filter_annots_general(aids, view='backleft')[0:4] backright = ibs.filter_annots_general(aids, view='backright')[0:4] right_graph = nx.DiGraph(ut.upper_diag_self_prodx(right_aids)) left_graph = nx.DiGraph(ut.upper_diag_self_prodx(left_aids)) back_edges = [ tuple([back[0], backright[0]][::1]), tuple([back[0], backleft[0]][::1]), ] back_graph = nx.DiGraph(back_edges) # Let the graph be a bit smaller right_graph.edge[right_aids[1]][ right_aids[2]]['constraint'] = ut.get_argflag('--constraint') left_graph.edge[left_aids[1]][left_aids[2]]['constraint'] = ut.get_argflag( '--constraint') #right_graph = right_graph.to_undirected().to_directed() #left_graph = left_graph.to_undirected().to_directed() nx.set_node_attributes(right_graph, 'groupid', 'right') nx.set_node_attributes(left_graph, 'groupid', 'left') #nx.set_node_attributes(right_graph, 'scale', .2) #nx.set_node_attributes(left_graph, 'scale', .2) #back_graph.node[back[0]]['scale'] = 2.3 nx.set_node_attributes(back_graph, 'groupid', 'back') view_graph = nx.compose_all([left_graph, back_graph, right_graph]) view_graph.add_edges_from([ [backright[0], right_aids[0]][::-1], [backleft[0], left_aids[0]][::-1], ]) pt.ensure_pylab_qt4() graph = graph = view_graph # NOQA #graph = graph.to_undirected() nx.set_edge_attributes(graph, 'color', pt.DARK_ORANGE[0:3]) #nx.set_edge_attributes(graph, 'color', pt.BLACK) nx.set_edge_attributes(graph, 'color', {edge: pt.LIGHT_BLUE[0:3] for edge in back_edges}) #pt.close_all_figures(); from ibeis.viz import viz_graph layoutkw = { 'nodesep': 1, } viz_graph.viz_netx_chipgraph(ibs, graph, with_images=1, prog='dot', augment_graph=False, layoutkw=layoutkw) if False: """ #view_graph = left_graph pt.close_all_figures(); viz_netx_chipgraph(ibs, view_graph, with_images=0, prog='neato') #viz_netx_chipgraph(ibs, view_graph, layout='pydot', with_images=False) #back_graph = make_name_graph_interaction(ibs, aids=back, with_all=False) aids = left_aids + back + backleft + backright + right_aids for aid, chip in zip(aids, ibs.get_annot_chips(aids)): fpath = ut.truepath('~/slides/merge/aid_%d.jpg' % (aid,)) vt.imwrite(fpath, vt.resize_to_maxdims(chip, (400, 400))) ut.copy_files_to(, ) aids = ibs.filterannots_by_tags(ibs.get_valid_aids(), dict(has_any_annotmatch='splitcase')) aid1 = ibs.group_annots_by_name_dict(aids)[252] aid2 = ibs.group_annots_by_name_dict(aids)[6791] aids1 = ibs.get_annot_groundtruth(aid1)[0][0:4] aids2 = ibs.get_annot_groundtruth(aid2)[0] make_name_graph_interaction(ibs, aids=aids1 + aids2, with_all=False) ut.ensuredir(ut.truthpath('~/slides/split/)) for aid, chip in zip(aids, ibs.get_annot_chips(aids)): fpath = ut.truepath('~/slides/merge/aidA_%d.jpg' % (aid,)) vt.imwrite(fpath, vt.resize_to_maxdims(chip, (400, 400))) """ pass
def add_clique(graph, nodes, edgeattrs={}, nodeattrs={}): edge_list = ut.upper_diag_self_prodx(nodes) graph.add_edges_from(edge_list, **edgeattrs) return edge_list
def fix_duplicates(drive): r""" for every duplicate file passing a (eg avi) filter, remove the file that is in the smallest directory. On a tie use the smallest dpath. This will filter all duplicate files in a folder into a single folder. but... need to look at non-duplicates in that folder and decide if they should be moved as well. So, should trigger on folders that have at least 50% duplicate. Might not want to move curated folders. Example: cd ~/local/scripts >>> from register_files import * # NOQA >>> dpaths = ut.get_argval('--drives', type_=list, default=['E:/'])#'D:/', 'E:/', 'F:/']) >>> drives = [Drive(root_dpath) for root_dpath in dpaths] >>> E = drive = drives[0] >>> #D, E, F = drives """ print('Fixing Duplicates in %r' % (drive,)) list_ = drive.fpath_hashX_list multiindex_dict_ = build_multindex(list_) duplicate_hashes = [ key for key, val in six.iteritems(multiindex_dict_) if len(val) > 1 ] duplicate_idxs = ut.dict_take(multiindex_dict_, duplicate_hashes) unflat_fpaths = ut.list_unflat_take(drive.fpath_list, duplicate_idxs) # Check if any dups have been removed still_exists = ut.unflat_map(exists, unflat_fpaths) unflat_idxs2 = ut.zipcompress(duplicate_idxs, still_exists) duplicate_idxs = [idxs for idxs in unflat_idxs2 if len(idxs) > 1] # Look at duplicate files unflat_fpaths = ut.list_unflat_take(drive.fpath_list, duplicate_idxs) unflat_sizes = ut.list_unflat_take(drive.fpath_bytes_list, duplicate_idxs) # Find highly coupled directories if True: coupled_dirs = [] for fpaths in unflat_fpaths: #basedir = ut.longest_existing_path(commonprefix(fpaths)) dirs = sorted(list(map(dirname, fpaths))) _list = list(range(len(dirs))) idxs = ut.upper_diag_self_prodx(_list) coupled_dirs.extend(list(map(tuple, ut.list_unflat_take(dirs, idxs)))) hist_ = ut.dict_hist(coupled_dirs) coupled_idxs = ut.list_argsort(hist_.values())[::-1] most_coupled = ut.take(list(hist_.keys()), coupled_idxs[0:100]) print('Coupled fpaths: ' + ut.list_str(most_coupled, nl=True)) print('%d unique files are duplicated' % (len(unflat_sizes),)) #print('Duplicate sizes: ' + ut.list_str(unflat_sizes[0:10], nl=True)) #print('Duplicate fpaths: ' + ut.list_str(unflat_fpaths[0:10], nl=True)) #print('Duplicate fpaths: ' + ut.list_str(unflat_fpaths[0::5], nl=True)) print('Duplicate fpaths: ' + ut.list_str(unflat_fpaths, nl=True)) # Find duplicate directories dpath_list = list(drive.dpath_to_fidx.keys()) fidxs_list = ut.dict_take(drive.dpath_to_fidx, drive.dpath_list) #exists_list = list(map(exists, drive.fpath_list)) #unflat_exists = ut.list_unflat_take(exists_list, fidxs_list) fname_registry = [basename(fpath) for fpath in drive.fpath_list] unflat_fnames = ut.list_unflat_take(fname_registry, fidxs_list) def unsorted_list_hash(list_): return ut.hashstr27(str(sorted(list_))) unflat_fname_sets = list(map(unsorted_list_hash, ut.ProgIter(unflat_fnames, freq=10000))) fname_based_duplicate_dpaths = [] multiindex_dict2_ = build_multindex(unflat_fname_sets) fname_based_duplicate_hashes = [key for key, val in multiindex_dict2_.items() if len(val) > 1] print('#fname_based_duplicate_dpaths = %r' % (len(fname_based_duplicate_hashes),)) fname_based_duplicate_didxs = ut.dict_take(multiindex_dict2_, fname_based_duplicate_hashes) fname_based_duplicate_dpaths = ut.list_unflat_take(dpath_list, fname_based_duplicate_didxs) print(ut.repr3(fname_based_duplicate_dpaths[0:10]))
def fix_duplicates(drive): r""" for every duplicate file passing a (eg avi) filter, remove the file that is in the smallest directory. On a tie use the smallest dpath. This will filter all duplicate files in a folder into a single folder. but... need to look at non-duplicates in that folder and decide if they should be moved as well. So, should trigger on folders that have at least 50% duplicate. Might not want to move curated folders. Example: cd ~/local/scripts >>> from register_files import * # NOQA >>> dpaths = ut.get_argval('--drives', type_=list, default=['E:/'])#'D:/', 'E:/', 'F:/']) >>> drives = [Drive(root_dpath) for root_dpath in dpaths] >>> E = drive = drives[0] >>> #D, E, F = drives """ print('Fixing Duplicates in %r' % (drive, )) list_ = drive.fpath_hashX_list multiindex_dict_ = build_multindex(list_) duplicate_hashes = [ key for key, val in six.iteritems(multiindex_dict_) if len(val) > 1 ] duplicate_idxs = ut.dict_take(multiindex_dict_, duplicate_hashes) unflat_fpaths = ut.list_unflat_take(drive.fpath_list, duplicate_idxs) # Check if any dups have been removed still_exists = ut.unflat_map(exists, unflat_fpaths) unflat_idxs2 = ut.zipcompress(duplicate_idxs, still_exists) duplicate_idxs = [idxs for idxs in unflat_idxs2 if len(idxs) > 1] # Look at duplicate files unflat_fpaths = ut.list_unflat_take(drive.fpath_list, duplicate_idxs) unflat_sizes = ut.list_unflat_take(drive.fpath_bytes_list, duplicate_idxs) # Find highly coupled directories if True: coupled_dirs = [] for fpaths in unflat_fpaths: #basedir = ut.longest_existing_path(commonprefix(fpaths)) dirs = sorted(list(map(dirname, fpaths))) _list = list(range(len(dirs))) idxs = ut.upper_diag_self_prodx(_list) coupled_dirs.extend( list(map(tuple, ut.list_unflat_take(dirs, idxs)))) hist_ = ut.dict_hist(coupled_dirs) coupled_idxs = ut.list_argsort(hist_.values())[::-1] most_coupled = ut.take(list(hist_.keys()), coupled_idxs[0:100]) print('Coupled fpaths: ' + ut.repr2(most_coupled, nl=True)) print('%d unique files are duplicated' % (len(unflat_sizes), )) #print('Duplicate sizes: ' + ut.repr2(unflat_sizes[0:10], nl=True)) #print('Duplicate fpaths: ' + ut.repr2(unflat_fpaths[0:10], nl=True)) #print('Duplicate fpaths: ' + ut.repr2(unflat_fpaths[0::5], nl=True)) print('Duplicate fpaths: ' + ut.repr2(unflat_fpaths, nl=True)) # Find duplicate directories dpath_list = list(drive.dpath_to_fidx.keys()) fidxs_list = ut.dict_take(drive.dpath_to_fidx, drive.dpath_list) #exists_list = list(map(exists, drive.fpath_list)) #unflat_exists = ut.list_unflat_take(exists_list, fidxs_list) fname_registry = [basename(fpath) for fpath in drive.fpath_list] unflat_fnames = ut.list_unflat_take(fname_registry, fidxs_list) def unsorted_list_hash(list_): return ut.hashstr27(str(sorted(list_))) unflat_fname_sets = list( map(unsorted_list_hash, ut.ProgIter(unflat_fnames, freq=10000))) fname_based_duplicate_dpaths = [] multiindex_dict2_ = build_multindex(unflat_fname_sets) fname_based_duplicate_hashes = [ key for key, val in multiindex_dict2_.items() if len(val) > 1 ] print('#fname_based_duplicate_dpaths = %r' % (len(fname_based_duplicate_hashes), )) fname_based_duplicate_didxs = ut.dict_take( multiindex_dict2_, fname_based_duplicate_hashes) fname_based_duplicate_dpaths = ut.list_unflat_take( dpath_list, fname_based_duplicate_didxs) print(ut.repr3(fname_based_duplicate_dpaths[0:10]))
def make_netx_graph_from_aid_groups(ibs, aids_list, only_reviewed_matches=True, invis_edges=None, ensure_edges=None, temp_nids=None, allow_directed=False): r""" Args: ibs (ibeis.IBEISController): image analysis api aids_list (list): Example: >>> # DISABLE_DOCTEST >>> from ibeis.viz.viz_graph import * # NOQA >>> import ibeis >>> ibs = ibeis.opendb(defaultdb='testdb1') >>> aids_list = [[1, 2, 3, 4], [5, 6, 7]] >>> invis_edges = [(1, 5)] >>> only_reviewed_matches = True >>> graph = make_netx_graph_from_aid_groups(ibs, aids_list, >>> only_reviewed_matches, >>> invis_edges) >>> list(nx.connected_components(graph.to_undirected())) """ #aids_list, nid_list = ibs.group_annots_by_name(aid_list) unique_aids = list(ut.flatten(aids_list)) # grouped version unflat_edges = (list(itertools.product(aids, aids)) for aids in aids_list) aid_pairs = [tup for tup in ut.iflatten(unflat_edges) if tup[0] != tup[1]] aids1 = ut.get_list_column(aid_pairs, 0) aids2 = ut.get_list_column(aid_pairs, 1) if only_reviewed_matches: annotmatch_rowids = ibs.get_annotmatch_rowid_from_superkey(aids1, aids2) annotmatch_rowids = ut.filter_Nones(annotmatch_rowids) aids1 = ibs.get_annotmatch_aid1(annotmatch_rowids) aids2 = ibs.get_annotmatch_aid2(annotmatch_rowids) graph = make_netx_graph_from_aidpairs(ibs, aids1, aids2, unique_aids=unique_aids) if ensure_edges is not None: if ensure_edges == 'all': ensure_edges = list(ut.upper_diag_self_prodx(list(graph.nodes()))) ensure_edges_ = [] for edge in ensure_edges: edge = tuple(edge) redge = tuple(edge[::-1]) # HACK if graph.has_edge(*edge): ensure_edges_.append(edge) pass #nx.set_edge_attributes(graph, 'weight', {edge: .001}) elif (not allow_directed) and graph.has_edge(*redge): ensure_edges_.append(redge) #nx.set_edge_attributes(graph, 'weight', {redge: .001}) pass else: ensure_edges_.append(edge) #graph.add_edge(*edge, weight=.001) graph.add_edge(*edge) if temp_nids is None: unique_nids = ibs.get_annot_nids(list(graph.nodes())) else: # HACK unique_nids = [1] * len(list(graph.nodes())) #unique_nids = temp_nids nx.set_node_attributes(graph, 'nid', dict(zip(graph.nodes(), unique_nids))) import plottool as pt ensure_names_are_connected(graph, aids_list) # Color edges by nid color_by_nids(graph, unique_nids=unique_nids) if invis_edges: for edge in invis_edges: if graph.has_edge(*edge): nx.set_edge_attributes(graph, 'style', {edge: 'invis'}) nx.set_edge_attributes(graph, 'invisible', {edge: True}) else: graph.add_edge(*edge, style='invis', invisible=True) # Hack color images orange if ensure_edges: nx.set_edge_attributes(graph, 'color', {tuple(edge): pt.ORANGE for edge in ensure_edges_}) return graph
def merge_viewpoint_graph(): r""" CommandLine: python -m ibeis.scripts.specialdraw merge_viewpoint_graph --show Example: >>> # DISABLE_DOCTEST >>> from ibeis.scripts.specialdraw import * # NOQA >>> result = merge_viewpoint_graph() >>> print(result) >>> ut.quit_if_noshow() >>> import plottool as pt >>> ut.show_if_requested() """ import plottool as pt import ibeis import networkx as nx defaultdb = 'PZ_Master1' ibs = ibeis.opendb(defaultdb=defaultdb) #nids = None aids = ibs.get_name_aids(4875) ibs.print_annot_stats(aids) left_aids = ibs.filter_annots_general(aids, view='left')[0:3] right_aids = ibs.filter_annots_general(aids, view='right') right_aids = list(set(right_aids) - {14517})[0:3] back = ibs.filter_annots_general(aids, view='back')[0:4] backleft = ibs.filter_annots_general(aids, view='backleft')[0:4] backright = ibs.filter_annots_general(aids, view='backright')[0:4] right_graph = nx.DiGraph(ut.upper_diag_self_prodx(right_aids)) left_graph = nx.DiGraph(ut.upper_diag_self_prodx(left_aids)) back_edges = [ tuple([back[0], backright[0]][::1]), tuple([back[0], backleft[0]][::1]), ] back_graph = nx.DiGraph(back_edges) # Let the graph be a bit smaller right_graph.edge[right_aids[1]][right_aids[2]]['constraint'] = ut.get_argflag('--constraint') left_graph.edge[left_aids[1]][left_aids[2]]['constraint'] = ut.get_argflag('--constraint') #right_graph = right_graph.to_undirected().to_directed() #left_graph = left_graph.to_undirected().to_directed() nx.set_node_attributes(right_graph, 'groupid', 'right') nx.set_node_attributes(left_graph, 'groupid', 'left') #nx.set_node_attributes(right_graph, 'scale', .2) #nx.set_node_attributes(left_graph, 'scale', .2) #back_graph.node[back[0]]['scale'] = 2.3 nx.set_node_attributes(back_graph, 'groupid', 'back') view_graph = nx.compose_all([left_graph, back_graph, right_graph]) view_graph.add_edges_from([ [backright[0], right_aids[0]][::-1], [backleft[0], left_aids[0]][::-1], ]) pt.ensure_pylab_qt4() graph = graph = view_graph # NOQA #graph = graph.to_undirected() nx.set_edge_attributes(graph, 'color', pt.DARK_ORANGE[0:3]) #nx.set_edge_attributes(graph, 'color', pt.BLACK) nx.set_edge_attributes(graph, 'color', {edge: pt.LIGHT_BLUE[0:3] for edge in back_edges}) #pt.close_all_figures(); from ibeis.viz import viz_graph layoutkw = { 'nodesep': 1, } viz_graph.viz_netx_chipgraph(ibs, graph, with_images=1, prog='dot', augment_graph=False, layoutkw=layoutkw) if False: """ #view_graph = left_graph pt.close_all_figures(); viz_netx_chipgraph(ibs, view_graph, with_images=0, prog='neato') #viz_netx_chipgraph(ibs, view_graph, layout='pydot', with_images=False) #back_graph = make_name_graph_interaction(ibs, aids=back, with_all=False) aids = left_aids + back + backleft + backright + right_aids for aid, chip in zip(aids, ibs.get_annot_chips(aids)): fpath = ut.truepath('~/slides/merge/aid_%d.jpg' % (aid,)) vt.imwrite(fpath, vt.resize_to_maxdims(chip, (400, 400))) ut.copy_files_to(, ) aids = ibs.filterannots_by_tags(ibs.get_valid_aids(), dict(has_any_annotmatch='splitcase')) aid1 = ibs.group_annots_by_name_dict(aids)[252] aid2 = ibs.group_annots_by_name_dict(aids)[6791] aids1 = ibs.get_annot_groundtruth(aid1)[0][0:4] aids2 = ibs.get_annot_groundtruth(aid2)[0] make_name_graph_interaction(ibs, aids=aids1 + aids2, with_all=False) ut.ensuredir(ut.truthpath('~/slides/split/)) for aid, chip in zip(aids, ibs.get_annot_chips(aids)): fpath = ut.truepath('~/slides/merge/aidA_%d.jpg' % (aid,)) vt.imwrite(fpath, vt.resize_to_maxdims(chip, (400, 400))) """ pass