Beispiel #1
0
 def get_relations(self, role_names=None):
     from discograph.library.sqlite.SqliteRelation import SqliteRelation
     return SqliteRelation.search(
         entity_id=self.id,
         entity_type=1,
         role_names=role_names,
         )
Beispiel #2
0
 def query_relations(self, entity_keys, role_names=None, year=None, verbose=True):
     print("        Roles:", role_names)
     entity_query_cap = 999
     entity_query_cap -= (1 + len(role_names)) * 2
     if isinstance(year, int):
         entity_query_cap -= 2
     elif year:
         entity_query_cap -= 4
     entity_query_cap //= 2
     range_stop = len(entity_keys)
     relations = []
     for start in range(0, range_stop, entity_query_cap):
         stop = start + entity_query_cap
         entity_key_slice = entity_keys[start:stop]
         found = SqliteRelation.search_multi(entity_key_slice, role_names=role_names, verbose=verbose, year=year)
         relations.extend(found)
     return relations
Beispiel #3
0
    def collect_entities_2(self):
        original_role_names = self.role_names or ()
        provisional_role_names = set(original_role_names)
        provisional_role_names.update(['Alias', 'Member Of'])
        provisional_role_names = sorted(provisional_role_names)

        if type(self.center_entity).__name__.endswith('Artist'):
            initial_key = (1, self.center_entity.discogs_id)
        else:
            initial_key = (2, self.center_entity.discogs_id)
        entity_keys_to_visit = set([initial_key])

        links = dict()
        nodes = dict()

        entity_query_cap = 999
        entity_query_cap -= (1 + len(provisional_role_names)) * 2
        entity_query_cap //= 2

        break_on_next_loop = False

        for distance in range(self.degree + 1):

            current_entity_keys_to_visit = list(entity_keys_to_visit)
            for key in current_entity_keys_to_visit:
                nodes.setdefault(key, self.entity_key_to_node(key, distance))

            #print('    At distance {}:'.format(distance))
            #print('        {} new nodes'.format(
            #    len(current_entity_keys_to_visit)))
            #print('        {} old nodes'.format(
            #    len(nodes) - len(current_entity_keys_to_visit)))
            #print('        {} old links'.format(len(links)))

            if break_on_next_loop:
                #print('        Leaving search loop.')
                break
            if (
                1 < distance and
                self.max_nodes and
                self.max_nodes <= len(nodes)
                ):
                #print('        Maxed out node count.')
                break_on_next_loop = True

            entity_keys_to_visit.clear()
            relations = []
            range_stop = len(current_entity_keys_to_visit)
            for start in range(0, range_stop, entity_query_cap):
                # Split into multiple queries to avoid variable maximum.
                stop = start + entity_query_cap
                #print('        Querying: {} to {} of {} new nodes'.format(
                #    start, stop, len(current_entity_keys_to_visit)
                #    ))
                entity_key_slice = current_entity_keys_to_visit[start:stop]
                relations.extend(SqliteRelation.search_multi(
                    entity_key_slice,
                    role_names=provisional_role_names,
                    ))
            for relation in relations:
                e1k = (relation['entity_one_type'], relation['entity_one_id'])
                e2k = (relation['entity_two_type'], relation['entity_two_id'])
                if e1k not in nodes:
                    entity_keys_to_visit.add(e1k)
                    nodes[e1k] = self.entity_key_to_node(e1k, distance + 1)
                if e2k not in nodes:
                    entity_keys_to_visit.add(e2k)
                    nodes[e2k] = self.entity_key_to_node(e2k, distance + 1)
                if relation['role_name'] == 'Alias':
                    nodes[e1k]['aliases'].add(e2k[1])
                    nodes[e2k]['aliases'].add(e1k[1])
                elif relation['role_name'] in ('Member Of', 'Sublabel Of'):
                    nodes[e2k]['members'].add(e1k[1])
                if relation['role_name'] not in original_role_names:
                    continue
                link = self.relation_to_link(relation)
                link['distance'] = min(
                    nodes[e1k]['distance'],
                    nodes[e2k]['distance'],
                    )
                links[link['key']] = link
                nodes[e1k]['links'].add(link['key'])
                nodes[e2k]['links'].add(link['key'])

        #print('    Collected: {} / {}'.format(len(nodes), len(links)))

        # Query node names.
        artist_ids = []
        label_ids = []
        for entity_type, entity_id in nodes.keys():
            if entity_type == 1:
                artist_ids.append(entity_id)
            else:
                label_ids.append(entity_id)
        artists = []
        for i in range(0, len(artist_ids), 999):
            query = (SqliteArtist
                .select()
                .where(SqliteArtist.id.in_(artist_ids[i:i + 999]))
                )
            artists.extend(query)
        labels = []
        for i in range(0, len(artist_ids), 999):
            query = (SqliteLabel
                .select()
                .where(SqliteLabel.id.in_(label_ids[i:i + 999]))
                )
            labels.extend(query)
        for artist in artists:
            nodes[(1, artist.id)]['name'] = artist.name
        for label in labels:
            nodes[(2, label.id)]['name'] = label.name

        # Prune nameless nodes.
        for node in tuple(nodes.values()):
            if not node.get('name'):
                self.prune_node(node, nodes, links, update_missing_count=False)
        #print('    Pruning nameless: {} / {}'.format(len(nodes), len(links)))

        # Prune unvisited nodes and links.
        for key in entity_keys_to_visit:
            node = nodes.get(key)
            self.prune_node(node, nodes, links)
        #print('    Pruned unvisited: {} / {}'.format(
        #    len(nodes), len(links)))

        # Prune nodes beyond maximum.
        if self.max_nodes:
            nodes_to_prune = sorted(nodes.values(),
                key=lambda x: (x['distance'], x['id']),
                )[self.max_nodes:]
            for node in nodes_to_prune:
                self.prune_node(node, nodes, links)
        #print('    Pruned by max nodes: {} / {}'.format(
        #    len(nodes), len(links)))

        # Prune links beyond maximum.
        if self.max_links:
            links_to_prune = sorted(links.values(),
                key=self.link_sorter,
                )[self.max_links:]
            for link in links_to_prune:
                self.prune_link(link, nodes, links)
        #print('    Pruned by max links: {} / {}'.format(
        #    len(nodes), len(links)))

        #print('Finally: {} / {}'.format(len(nodes), len(links)))
        return nodes, links