class NalaSingleModelTagger(Tagger): def __init__(self, class_id=MUT_CLASS_ID, bin_model=pkg_resources.resource_filename( 'nala.data', 'all3_model'), features_pipeline=None, execute_pipeline=True, execute_pp=True, keep_silent=True, keep_genetic_markers=True, keep_unnumbered=True, keep_rs_ids=True): super().__init__([class_id]) self.class_id = class_id self.bin_model = bin_model self.features_pipeline = features_pipeline if features_pipeline else get_prepare_pipeline_for_best_model( ) self.execute_pipeline = execute_pipeline # --- self.crf = PyCRFSuite(model_file=self.bin_model) self.post = None if execute_pp: self.post = PostProcessing( keep_silent=keep_silent, keep_genetic_markers=keep_genetic_markers, keep_unnumbered=keep_unnumbered, keep_rs_ids=keep_rs_ids) def tag(self, dataset, class_id=None): class_id = self.class_id if class_id is None else class_id if self.execute_pipeline: self.features_pipeline.execute(dataset) self.crf.annotate(dataset, class_id) if self.post: self.post.process(dataset, class_id=class_id)
class NalaMultipleModelTagger(Tagger): def __init__( self, class_id=MUT_CLASS_ID, st_model=pkg_resources.resource_filename('nala.data', 'st_model'), all3_model=pkg_resources.resource_filename('nala.data', 'all3_model'), features_pipeline=None, execute_pp=True, keep_silent=True, keep_genetic_markers=True, keep_unnumbered=True, keep_rs_ids=True): super().__init__([class_id]) tagger1 = NalaSingleModelTagger(class_id, st_model, features_pipeline) tagger2 = NalaSingleModelTagger(class_id, all3_model, tagger1.features_pipeline, execute_pipeline=False) self.tagger = MultipleModelTagger(tagger1, tagger2, [class_id]) # --- self.post = None if execute_pp: self.post = PostProcessing( keep_silent=keep_silent, keep_genetic_markers=keep_genetic_markers, keep_unnumbered=keep_unnumbered, keep_rs_ids=keep_rs_ids) def tag(self, dataset, class_id=None): class_id = self.class_id if class_id is None else class_id self.tagger.tag(dataset, class_id) if self.post: self.post.process(dataset, class_id=class_id)