Esempio n. 1
0
    def _route(self, table_name, range_array, metadata=None, **kwargs):
        # PXU TODO: Implement Thread-local Context
        # PXU TODO: Session life mgt
        try:
            table = db.Session.query(Tables).filter(
                and_(Tables.table_id == table_name,
                     Tables.state != Tables.TO_DELETE)).first()
        except sqlalchemy_exc.SQLAlchemyError as e:
            raise exceptions.DBError(message=str(e), metadata=metadata)

        if not table:
            raise exceptions.TableNotFoundError(table_name, metadata=metadata)
        files = table.files_to_search(range_array)
        db.remove_session()

        servers = self.conn_mgr.conn_names
        logger.info('Available servers: {}'.format(servers))

        ring = HashRing(servers)

        routing = {}

        for f in files:
            target_host = ring.get_node(str(f.id))
            sub = routing.get(target_host, None)
            if not sub:
                routing[target_host] = {'table_id': table_name, 'file_ids': []}
            routing[target_host]['file_ids'].append(str(f.id))

        return routing
    def _route(self,
               table_name,
               range_array,
               partition_tags=None,
               metadata=None,
               **kwargs):
        # PXU TODO: Implement Thread-local Context
        # PXU TODO: Session life mgt

        if not partition_tags:
            cond = and_(
                or_(Tables.table_id == table_name,
                    Tables.owner_table == table_name),
                Tables.state != Tables.TO_DELETE)
        else:
            cond = and_(Tables.state != Tables.TO_DELETE,
                        Tables.owner_table == table_name,
                        Tables.partition_tag.in_(partition_tags))
        try:
            tables = db.Session.query(Tables).filter(cond).all()
        except sqlalchemy_exc.SQLAlchemyError as e:
            raise exceptions.DBError(message=str(e), metadata=metadata)

        if not tables:
            raise exceptions.TableNotFoundError('{}:{}'.format(
                table_name, partition_tags),
                                                metadata=metadata)

        total_files = []
        for table in tables:
            files = table.files_to_search(range_array)
            total_files.append(files)

        db.remove_session()

        servers = self.readonly_topo.group_names
        logger.info('Available servers: {}'.format(list(servers)))

        ring = HashRing(servers)

        routing = {}

        for files in total_files:
            for f in files:
                target_host = ring.get_node(str(f.id))
                sub = routing.get(target_host, None)
                if not sub:
                    sub = {}
                    routing[target_host] = sub
                kv = sub.get(f.table_id, None)
                if not kv:
                    kv = []
                    sub[f.table_id] = kv
                sub[f.table_id].append(str(f.id))

        return routing
    def _route(self, collection_name, range_array, partition_tags=None, metadata=None, **kwargs):
        # PXU TODO: Implement Thread-local Context
        # PXU TODO: Session life mgt

        if not partition_tags:
            cond = and_(
                or_(Tables.table_id == collection_name, Tables.owner_table == collection_name),
                Tables.state != Tables.TO_DELETE)
        else:
            # TODO: collection default partition is '_default'
            cond = and_(Tables.state != Tables.TO_DELETE,
                        Tables.owner_table == collection_name)
                        # Tables.partition_tag.in_(partition_tags))
            if '_default' in partition_tags:
                default_par_cond = and_(Tables.table_id == collection_name, Tables.state != Tables.TO_DELETE)
                cond = or_(cond, default_par_cond)
        try:
            collections = db.Session.query(Tables).filter(cond).all()
        except sqlalchemy_exc.SQLAlchemyError as e:
            raise exceptions.DBError(message=str(e), metadata=metadata)

        if not collections:
            logger.error("Cannot find collection {} / {} in metadata".format(collection_name, partition_tags))
            raise exceptions.CollectionNotFoundError('{}:{}'.format(collection_name, partition_tags), metadata=metadata)

        collection_list = []
        if not partition_tags:
            collection_list = [str(collection.table_id) for collection in collections]
        else:
            for collection in collections:
                if collection.table_id == collection_name:
                    collection_list.append(collection_name)
                    continue

                for tag in partition_tags:
                    if re.match(tag, collection.partition_tag):
                        collection_list.append(collection.table_id)
                        break

        file_type_cond = or_(
            TableFiles.file_type == TableFiles.FILE_TYPE_RAW,
            TableFiles.file_type == TableFiles.FILE_TYPE_TO_INDEX,
            TableFiles.file_type == TableFiles.FILE_TYPE_INDEX,
        )
        file_cond = and_(file_type_cond, TableFiles.table_id.in_(collection_list))
        try:
            files = db.Session.query(TableFiles).filter(file_cond).all()
        except sqlalchemy_exc.SQLAlchemyError as e:
            raise exceptions.DBError(message=str(e), metadata=metadata)

        if not files:
            logger.warning("Collection file is empty. {}".format(collection_list))
        #     logger.error("Cannot find collection file id {} / {} in metadata".format(collection_name, partition_tags))
        #     raise exceptions.CollectionNotFoundError('Collection file id not found. {}:{}'.format(collection_name, partition_tags),
        #                                              metadata=metadata)

        db.remove_session()

        servers = self.readonly_topo.group_names
        logger.info('Available servers: {}'.format(list(servers)))

        ring = HashRing(servers)

        routing = {}

        for f in files:
            target_host = ring.get_node(str(f.id))
            sub = routing.get(target_host, None)
            if not sub:
                sub = []
                routing[target_host] = sub
            routing[target_host].append(str(f.id))

        return routing
Esempio n. 4
0
    def _route(self,
               collection_name,
               range_array,
               partition_tags=None,
               metadata=None,
               **kwargs):
        # PXU TODO: Implement Thread-local Context
        # PXU TODO: Session life mgt
        """Router filter segment files which status are not suitable to be search. So the ro nodes
        do not filter files. Cause the writable node may update segment file status. In mishards
        cluster, the metadata is used to keep consistent segment file status.
        """

        # Select all available partitions from metadata.
        if not partition_tags:
            cond = and_(
                or_(Tables.table_id == collection_name,
                    Tables.owner_table == collection_name),
                Tables.state != Tables.TO_DELETE)
        else:
            # TODO: collection default partition is '_default'
            cond = and_(Tables.state != Tables.TO_DELETE,
                        Tables.owner_table == collection_name)
            # Tables.partition_tag.in_(partition_tags))
            if '_default' in partition_tags:
                # There is not a partition entity in meta table, the default partition correspond
                # to collection entity in meta.
                default_par_cond = and_(Tables.table_id == collection_name,
                                        Tables.state != Tables.TO_DELETE)
                cond = or_(cond, default_par_cond)
        try:
            collections = db.Session.query(Tables).filter(cond).all()
        except sqlalchemy_exc.SQLAlchemyError as e:
            raise exceptions.DBError(message=str(e), metadata=metadata)

        if not collections:
            logger.error(
                "Cannot find collection {} / {} in metadata during routing. Meta url: {}"
                .format(collection_name, partition_tags, db.url))
            raise exceptions.CollectionNotFoundError(
                "{}:{} not found in metadata".format(collection_name,
                                                     partition_tags),
                metadata=metadata)

        collection_list = []
        if not partition_tags:
            collection_list = [
                str(collection.table_id) for collection in collections
            ]
        else:
            for collection in collections:
                if collection.table_id == collection_name:
                    collection_list.append(collection_name)
                    continue

                # Here to support regex match.
                for tag in partition_tags:
                    if re.match(tag, collection.partition_tag):
                        collection_list.append(collection.table_id)
                        break

        file_type_cond = or_(
            TableFiles.file_type == TableFiles.FILE_TYPE_RAW,
            TableFiles.file_type == TableFiles.FILE_TYPE_TO_INDEX,
            TableFiles.file_type == TableFiles.FILE_TYPE_INDEX,
        )
        file_cond = and_(file_type_cond,
                         TableFiles.table_id.in_(collection_list))
        try:
            files = db.Session.query(TableFiles).filter(file_cond).all()
        except sqlalchemy_exc.SQLAlchemyError as e:
            raise exceptions.DBError(message=str(e), metadata=metadata)

        if not files:
            logger.warning(
                "Collection file is empty. {}".format(collection_list))
        #     logger.error("Cannot find collection file id {} / {} in metadata".format(collection_name, partition_tags))
        #     raise exceptions.CollectionNotFoundError('Collection file id not found. {}:{}'.format(collection_name, partition_tags),
        #                                              metadata=metadata)

        db.remove_session()

        # Use consistency hash to router segment files. The nodes are readonly nodes,
        # and items are segment files.
        servers = self.readonly_topo.group_names
        logger.info('Available servers: {}'.format(list(servers)))

        ring = HashRing(servers)

        routing = {}

        for f in files:
            target_host = ring.get_node(str(f.id))
            sub = routing.get(target_host, None)
            if not sub:
                sub = []
                routing[target_host] = sub
            # routing[target_host].append({"id": str(f.id), "update_time": int(f.updated_time)})
            routing[target_host].append((str(f.id), int(f.updated_time)))

        # Here to check files need to be updated.
        filter_routing = {}
        for host, filess in routing.items():
            ud_files = filter_file_to_update(host, filess)
            search_files = [f[0] for f in filess]
            filter_routing[host] = (search_files, ud_files)

        return filter_routing