Ejemplo n.º 1
0
class NalaSingleModelTagger(Tagger):
    def __init__(self,
                 class_id=MUT_CLASS_ID,
                 bin_model=pkg_resources.resource_filename(
                     'nala.data', 'all3_model'),
                 features_pipeline=None,
                 execute_pipeline=True,
                 execute_pp=True,
                 keep_silent=True,
                 keep_genetic_markers=True,
                 keep_unnumbered=True,
                 keep_rs_ids=True):

        super().__init__([class_id])

        self.class_id = class_id
        self.bin_model = bin_model
        self.features_pipeline = features_pipeline if features_pipeline else get_prepare_pipeline_for_best_model(
        )
        self.execute_pipeline = execute_pipeline
        # ---
        self.crf = PyCRFSuite(model_file=self.bin_model)

        self.post = None

        if execute_pp:
            self.post = PostProcessing(
                keep_silent=keep_silent,
                keep_genetic_markers=keep_genetic_markers,
                keep_unnumbered=keep_unnumbered,
                keep_rs_ids=keep_rs_ids)

    def tag(self, dataset, class_id=None):
        class_id = self.class_id if class_id is None else class_id

        if self.execute_pipeline:
            self.features_pipeline.execute(dataset)

        self.crf.annotate(dataset, class_id)

        if self.post:
            self.post.process(dataset, class_id=class_id)
Ejemplo n.º 2
0
class NalaMultipleModelTagger(Tagger):
    def __init__(
            self,
            class_id=MUT_CLASS_ID,
            st_model=pkg_resources.resource_filename('nala.data', 'st_model'),
            all3_model=pkg_resources.resource_filename('nala.data',
                                                       'all3_model'),
            features_pipeline=None,
            execute_pp=True,
            keep_silent=True,
            keep_genetic_markers=True,
            keep_unnumbered=True,
            keep_rs_ids=True):

        super().__init__([class_id])

        tagger1 = NalaSingleModelTagger(class_id, st_model, features_pipeline)
        tagger2 = NalaSingleModelTagger(class_id,
                                        all3_model,
                                        tagger1.features_pipeline,
                                        execute_pipeline=False)
        self.tagger = MultipleModelTagger(tagger1, tagger2, [class_id])
        # ---
        self.post = None
        if execute_pp:
            self.post = PostProcessing(
                keep_silent=keep_silent,
                keep_genetic_markers=keep_genetic_markers,
                keep_unnumbered=keep_unnumbered,
                keep_rs_ids=keep_rs_ids)

    def tag(self, dataset, class_id=None):
        class_id = self.class_id if class_id is None else class_id
        self.tagger.tag(dataset, class_id)
        if self.post:
            self.post.process(dataset, class_id=class_id)