Python MatchEngine.create_match_tree Examples, matchengine.engine.MatchEngine.create_match_tree Python Examples

Example #1

0

Show file

    def test_extract_cancer_types(self):

        m = MatchEngine(get_db())
        match_tree = match_tree_example
        g = m.create_match_tree(match_tree)
        pmt = ParseMatchTree(g)
        cancer_type_dict = pmt.extract_cancer_types()

        assert sorted(cancer_type_dict['diagnoses']) == sorted(
            ['Ocular Melanoma']), cancer_type_dict['diagnoses']

        assert sorted(cancer_type_dict['cancer_types_expanded']) == sorted([
            'Ocular Melanoma', 'Uveal Melanoma', 'Conjunctival Melanoma'
        ]), cancer_type_dict['cancer_types_expanded']

        assert sorted(cancer_type_dict['excluded_cancer_types']
                      ) == [], cancer_type_dict['excluded_cancer_types']
        assert cancer_type_dict['primary_cancer_types'] == [
            'Eye'
        ], cancer_type_dict['primary_cancer_types']

        m = MatchEngine(get_db())
        match_tree = match_tree_example2
        g = m.create_match_tree(match_tree)
        pmt = ParseMatchTree(g)
        cancer_type_dict = pmt.extract_cancer_types()

        assert sorted(cancer_type_dict['diagnoses']) == sorted(
            ['_SOLID_']), cancer_type_dict['diagnoses']
        assert 'Acute Lymphoid Leukemia' not in cancer_type_dict[
            'cancer_types_expanded'], cancer_type_dict['cancer_types_expanded']
        assert sorted(cancer_type_dict['excluded_cancer_types']
                      ) == [], cancer_type_dict['excluded_cancer_types']
        assert cancer_type_dict['primary_cancer_types'] == [
            'All Solid Tumors'
        ], cancer_type_dict['primary_cancer_types']

        m = MatchEngine(get_db())
        match_tree = match_tree_example3
        g = m.create_match_tree(match_tree)
        pmt = ParseMatchTree(g)
        cancer_type_dict = pmt.extract_cancer_types()

        assert sorted(cancer_type_dict['diagnoses']) == sorted(
            ['_LIQUID_']), cancer_type_dict['diagnoses']
        assert 'Acute Lymphoid Leukemia' in cancer_type_dict[
            'cancer_types_expanded'], cancer_type_dict['cancer_types_expanded']
        assert sorted(cancer_type_dict['excluded_cancer_types']
                      ) == [], cancer_type_dict['excluded_cancer_types']
        assert cancer_type_dict['primary_cancer_types'] == [
            'All Liquid Tumors'
        ], cancer_type_dict['primary_cancer_types']

Example #2

0

Show file

    def test_extract_variants(self):

        m = MatchEngine(get_db())
        match_tree = trial['treatment_list']['step'][0]['match'][0]
        g = m.create_match_tree(match_tree)
        pmt = ParseMatchTree(g)
        v = pmt.extract_variants()
        assert 'BRAF V600E' in v['variants']
        assert 'BRAF V600K' in v['variants']
        assert 'KRAS any' in v['variants']
        assert 'EGFR wt' in v['wts']
        assert len(v['variants']) == 3
        assert len(v['wts']) == 1

        match_tree = trial['treatment_list']['step'][0]['match'][1]
        g = m.create_match_tree(match_tree)
        pmt = ParseMatchTree(g)
        v = pmt.extract_variants()

        assert 'PTEN CNV' in v['cnvs']
        assert 'BRCA1 SV' in v['svs']
        assert 'BRAF V600' in v['exclusions']
        assert len(v['variants']) == 0
        assert len(v['cnvs']) == 1
        assert len(v['svs']) == 1
        assert len(v['wts']) == 0
        assert len(v['exclusions']) == 1

        match_tree = trial['treatment_list']['step'][0]['match'][2]
        g = m.create_match_tree(match_tree)
        pmt = ParseMatchTree(g)
        v = pmt.extract_variants()
        assert 'BRAF V600E' not in v['variants']
        assert len(v['variants']) == 0
        assert len(v['wts']) == 0
        assert 'BRAF V600E' in v['exclusions']

        match_tree = trial['treatment_list']['step'][0]['match'][4]
        g = m.create_match_tree(match_tree)
        pmt = ParseMatchTree(g)
        v = pmt.extract_variants()
        assert 'BRAF V600K' in v['variants']
        assert 'EGFR any' in v['variants']
        assert len(v['variants']) == 2
        assert 'PTEN CNV' in v['cnvs']
        assert len(v['cnvs']) == 1
        assert 'KRAS' in v['exclusions']
        assert 'NRAS' in v['exclusions']
        assert len(v['exclusions']) == 2
        assert 'NTRK1 wt' in v['wts']
        assert len(v['wts']) == 1

Example #3

0

Show file

File: test_trial.py Project: dfci/matchminer-api

    def test_extract_variants(self):

        m = MatchEngine(get_db())
        match_tree = trial['treatment_list']['step'][0]['match'][0]
        g = m.create_match_tree(match_tree)
        pmt = ParseMatchTree(g)
        v = pmt.extract_variants()
        assert 'BRAF V600E' in v['variants']
        assert 'BRAF V600K' in v['variants']
        assert 'KRAS any' in v['variants']
        assert 'EGFR wt' in v['wts']
        assert len(v['variants']) == 3
        assert len(v['wts']) == 1

        match_tree = trial['treatment_list']['step'][0]['match'][1]
        g = m.create_match_tree(match_tree)
        pmt = ParseMatchTree(g)
        v = pmt.extract_variants()

        assert 'PTEN CNV' in v['cnvs']
        assert 'BRCA1 SV' in v['svs']
        assert 'BRAF V600' in v['exclusions']
        assert len(v['variants']) == 0
        assert len(v['cnvs']) == 1
        assert len(v['svs']) == 1
        assert len(v['wts']) == 0
        assert len(v['exclusions']) == 1

        match_tree = trial['treatment_list']['step'][0]['match'][2]
        g = m.create_match_tree(match_tree)
        pmt = ParseMatchTree(g)
        v = pmt.extract_variants()
        assert 'BRAF V600E' not in v['variants']
        assert len(v['variants']) == 0
        assert len(v['wts']) == 0
        assert 'BRAF V600E' in v['exclusions']

        match_tree = trial['treatment_list']['step'][0]['match'][4]
        g = m.create_match_tree(match_tree)
        pmt = ParseMatchTree(g)
        v = pmt.extract_variants()
        assert 'BRAF V600K' in v['variants']
        assert 'EGFR any' in v['variants']
        assert len(v['variants']) == 2
        assert 'PTEN CNV' in v['cnvs']
        assert len(v['cnvs']) == 1
        assert 'KRAS' in v['exclusions']
        assert 'NRAS' in v['exclusions']
        assert len(v['exclusions']) == 2
        assert 'NTRK1 wt' in v['wts']
        assert len(v['wts']) == 1

Example #4

0

Show file

File: test_trial.py Project: dfci/matchminer-api

    def test_extract_hr_status(self):

        m = MatchEngine(get_db())
        match_tree = trial['treatment_list']['step'][0]['match'][5]
        g = m.create_match_tree(match_tree)
        pmt = ParseMatchTree(g)
        hr = pmt.extract_hr_status()

        assert sorted(hr) == sorted(['HER2 Negative', 'ER Negative', 'PR Positive'])

Example #5

0

Show file

    def test_extract_hr_status(self):

        m = MatchEngine(get_db())
        match_tree = trial['treatment_list']['step'][0]['match'][5]
        g = m.create_match_tree(match_tree)
        pmt = ParseMatchTree(g)
        hr = pmt.extract_hr_status()

        assert sorted(hr) == sorted(
            ['HER2 Negative', 'ER Negative', 'PR Positive'])

Example #6

0

Show file

    def test_extract_signatures(self):

        m = MatchEngine(get_db())
        match_tree = trial['treatment_list']['step'][0]['match'][3]
        g = m.create_match_tree(match_tree)
        pmt = ParseMatchTree(g)
        s = pmt.extract_signatures()

        assert 'MMR-D' in s[0]
        assert 'MSI-H' in s[1]

Example #7

0

Show file

    def test_extract_genes(self):

        m = MatchEngine(get_db())
        match_tree = trial['treatment_list']['step'][0]['match'][0]
        g = m.create_match_tree(match_tree)
        pmt = ParseMatchTree(g)
        genes = pmt.extract_genes()

        assert 'BRAF' in genes, genes
        assert 'KRAS' in genes, genes
        assert 'EGFR' not in genes
        assert 'test' not in genes, genes
        assert len(genes) == 2, genes

        match_tree = trial['treatment_list']['step'][0]['match'][2]
        g = m.create_match_tree(match_tree)
        pmt = ParseMatchTree(g)
        genes = pmt.extract_genes()
        assert 'BRAF' not in genes, genes

Example #8

0

Show file

File: test_trial.py Project: dfci/matchminer-api

    def test_extract_signatures(self):

        m = MatchEngine(get_db())
        match_tree = trial['treatment_list']['step'][0]['match'][3]
        g = m.create_match_tree(match_tree)
        pmt = ParseMatchTree(g)
        s = pmt.extract_signatures()

        assert 'MMR-D' in s[0]
        assert 'MSI-H' in s[1]

Example #9

0

Show file

File: test_trial.py Project: dfci/matchminer-api

    def test_extract_cancer_types(self):

        m = MatchEngine(get_db())
        match_tree = match_tree_example
        g = m.create_match_tree(match_tree)
        pmt = ParseMatchTree(g)
        cancer_type_dict = pmt.extract_cancer_types()

        assert sorted(cancer_type_dict['diagnoses']) == sorted([
            'Ocular Melanoma'
        ]), cancer_type_dict['diagnoses']

        assert sorted(cancer_type_dict['cancer_types_expanded']) == sorted([
            'Ocular Melanoma',
            'Uveal Melanoma',
            'Conjunctival Melanoma'
        ]), cancer_type_dict['cancer_types_expanded']

        assert sorted(cancer_type_dict['excluded_cancer_types']) == [], cancer_type_dict['excluded_cancer_types']
        assert cancer_type_dict['primary_cancer_types'] == ['Eye'], cancer_type_dict['primary_cancer_types']

        m = MatchEngine(get_db())
        match_tree = match_tree_example2
        g = m.create_match_tree(match_tree)
        pmt = ParseMatchTree(g)
        cancer_type_dict = pmt.extract_cancer_types()

        assert sorted(cancer_type_dict['diagnoses']) == sorted(['_SOLID_']), cancer_type_dict['diagnoses']
        assert 'Acute Lymphoid Leukemia' not in cancer_type_dict['cancer_types_expanded'], cancer_type_dict['cancer_types_expanded']
        assert sorted(cancer_type_dict['excluded_cancer_types']) == [], cancer_type_dict['excluded_cancer_types']
        assert cancer_type_dict['primary_cancer_types'] == ['All Solid Tumors'], cancer_type_dict['primary_cancer_types']

        m = MatchEngine(get_db())
        match_tree = match_tree_example3
        g = m.create_match_tree(match_tree)
        pmt = ParseMatchTree(g)
        cancer_type_dict = pmt.extract_cancer_types()

        assert sorted(cancer_type_dict['diagnoses']) == sorted(['_LIQUID_']), cancer_type_dict['diagnoses']
        assert 'Acute Lymphoid Leukemia' in cancer_type_dict['cancer_types_expanded'], cancer_type_dict[
            'cancer_types_expanded']
        assert sorted(cancer_type_dict['excluded_cancer_types']) == [], cancer_type_dict['excluded_cancer_types']
        assert cancer_type_dict['primary_cancer_types'] == ['All Liquid Tumors'], cancer_type_dict['primary_cancer_types']

Example #10

0

Show file

File: test_trial.py Project: dfci/matchminer-api

    def test_extract_genes(self):

        m = MatchEngine(get_db())
        match_tree = trial['treatment_list']['step'][0]['match'][0]
        g = m.create_match_tree(match_tree)
        pmt = ParseMatchTree(g)
        genes = pmt.extract_genes()

        assert 'BRAF' in genes, genes
        assert 'KRAS' in genes, genes
        assert 'EGFR' not in genes
        assert 'test' not in genes, genes
        assert len(genes) == 2, genes

        match_tree = trial['treatment_list']['step'][0]['match'][2]
        g = m.create_match_tree(match_tree)
        pmt = ParseMatchTree(g)
        genes = pmt.extract_genes()
        assert 'BRAF' not in genes, genes

Example #11

0

Show file

    def _get_signatures(self, item):
        """
        Creates hormone receptor status and mutational signature summary lists

        :param item: Trial document
        """

        m = MatchEngine(get_db())
        for step in item['treatment_list']['step']:
            if 'match' in step:
                g = m.create_match_tree(step['match'][0])
                pmt = ParseMatchTree(g)
                signatures = pmt.extract_signatures()
                self.mmr.extend(signatures[0])
                self.ms.extend(signatures[1])
                self.sigs.extend(signatures[2])
                self.hr.extend(pmt.extract_hr_status())

            if 'arm' in step:
                for arm in step['arm']:
                    if 'match' in arm:
                        g = m.create_match_tree(arm['match'][0])
                        pmt = ParseMatchTree(g)
                        signatures = pmt.extract_signatures()
                        self.mmr.extend(signatures[0])
                        self.ms.extend(signatures[1])
                        self.sigs.extend(signatures[2])
                        self.hr.extend(pmt.extract_hr_status())

                    if 'dose_level' in arm:
                        for dose in arm['dose_level']:
                            if 'match' in dose:
                                g = m.create_match_tree(dose['match'][0])
                                pmt = ParseMatchTree(g)
                                signatures = pmt.extract_signatures()
                                self.mmr.extend(signatures[0])
                                self.ms.extend(signatures[1])
                                self.sigs.extend(signatures[2])
                                self.hr.extend(pmt.extract_hr_status())

Example #12

0

Show file

File: trial_search.py Project: dfci/matchminer-api

    def _get_signatures(self, item):
        """
        Creates hormone receptor status and mutational signature summary lists

        :param item: Trial document
        """

        m = MatchEngine(get_db())
        for step in item['treatment_list']['step']:
            if 'match' in step:
                g = m.create_match_tree(step['match'][0])
                pmt = ParseMatchTree(g)
                signatures = pmt.extract_signatures()
                self.mmr.extend(signatures[0])
                self.ms.extend(signatures[1])
                self.hr.extend(pmt.extract_hr_status())

            if 'arm' in step:
                for arm in step['arm']:
                    if 'match' in arm:
                        g = m.create_match_tree(arm['match'][0])
                        pmt = ParseMatchTree(g)
                        signatures = pmt.extract_signatures()
                        self.mmr.extend(signatures[0])
                        self.ms.extend(signatures[1])
                        self.hr.extend(pmt.extract_hr_status())

                    if 'dose_level' in arm:
                        for dose in arm['dose_level']:
                            if 'match' in dose:
                                g = m.create_match_tree(dose['match'][0])
                                pmt = ParseMatchTree(g)
                                signatures = pmt.extract_signatures()
                                self.mmr.extend(signatures[0])
                                self.ms.extend(signatures[1])
                                self.hr.extend(pmt.extract_hr_status())

Example #13

0

Show file

File: trial_search.py Project: oncokb/matchminer-api

class Autocomplete:
    def __init__(self, item):
        """
        Creates data for ElasticSearch's autocomplete index

        :param item: Trial info:
                    - treatment_list: Nested dictionary containing all match criteria
                    - summary:        Summary object created by the API
        """
        self.summary = item['_summary']
        self.treatment_list = item['treatment_list']

        self.vdict = {
            'variants': [],
            'wts': [],
            'svs': [],
            'cnvs': [],
            'exclusions': []
        }
        self.genes = []
        self.cancer_type_dict = None
        self.m = MatchEngine(get_db())

    @staticmethod
    def _get_cancer_type_weight(cancer_type, hierarchy='default'):
        """
        Sets the weights for ElasticSearch autocompletion on cancer types. Cancer type terms
        are split so that autocomplete suggestions will populate regardless of which word in the
        multi-word cancer type string is initially input. Higher weighted terms will populate the
        top of the autocomplete dropdown list.

        :param cancer_type: Text to display in the autocomplete dropdown list.
        :param hierarchy: Weight to give the text.
        :return: Dictionary specifying ElasticSearch rules.
        """

        weight_dict = {'primary': 10, 'default': 5, 'bucket': 20}
        if cancer_type == 'All Solid Tumors' or cancer_type == 'All Liquid Tumors':
            hierarchy = 'bucket'

        return {
            'input':
            list(
                set([cancer_type] +
                    [i for i in cancer_type.split() if len(i) > 3])),
            'output':
            cancer_type,
            'weight':
            weight_dict[hierarchy]
        }

    @staticmethod
    def _get_variants_weight(variant, esrule='variants'):
        """
        Sets the weights for ElasticSearch autocompletion on gene variants. Higher weighted terms will populate the
        top of the autocomplete dropdown list.

        :param variant: Text to display in the autocomplete dropdown list.
        :param esrule: Type of variant. This will determine the ElasticSearch parameters.
        :return: Dictionary specifying ElasticSearch rules.
        """

        weight_dict = {'variants': 1, 'wts': 5, 'svs': 3, 'cnvs': 3}
        return {'input': variant, 'weight': weight_dict[esrule]}

    @staticmethod
    def _get_investigator_suggest(investigator, dfci_investigator):
        """
        Creates a list of investigators from the _summary field of the trial collection
        """

        iin = []
        iout = ''
        ispl = [i.strip() for i in investigator.split(',')]
        if len(ispl) == 1:
            iin = [ispl[0]]
            iout = investigator
        elif len(ispl) >= 2:
            iin = [ispl[0], ispl[1]]
            iout = '%s %s' % (ispl[1], ispl[0])

        dfci_in = []
        dfci_out = ''
        if dfci_investigator is not None and 'first_name' in dfci_investigator:
            dfci_in.append(dfci_investigator['first_name'].strip())
            dfci_out += dfci_investigator['first_name'].strip()
        if dfci_investigator is not None and 'last_name' in dfci_investigator:
            dfci_in.append(dfci_investigator['last_name'].strip())
            dfci_out += ' %s' % dfci_investigator['last_name'].strip()

        inv_suggest = [{'input': [i for i in iin if i != ''], 'output': iout}]
        if dfci_out != iout and dfci_out != '':
            inv_suggest.append({'input': dfci_in, 'output': dfci_out.strip()})

        return inv_suggest

    @staticmethod
    def _get_tumor_types_search(ct_suggest):
        """
        Maps special cancer type text output to the values stored in the ElasticSearch index.

        :param ct_suggest: Cancer type text to display.
        :return: Cancer type text stored in th ElasticSearch index, which we will query.
        """

        tts = []
        for ct in ct_suggest:
            if 'output' in ct and ct['output'] == 'All Solid Tumors':
                tts.append('_SOLID_')
            elif 'output' in ct and ct['output'] == 'All Liquid Tumors':
                tts.append('_LIQUID_')
            else:
                tts.append(ct['output'])

        return tts

    def _extract_data_from_match(self, match):
        """
        Extract Cancer Type, Gene, and Variant data from the given match tree
        """

        g = self.m.create_match_tree(match)
        pmt = ParseMatchTree(g)
        self.cancer_type_dict = pmt.extract_cancer_types()
        self.genes.extend(pmt.extract_genes())
        vdict_tmp = pmt.extract_variants()
        for k, v in self.vdict.iteritems():
            v.extend(vdict_tmp[k])

    def add_autocomplete(self):
        """
        Recursively iterates through the treatment list and creates a list of genes contained within.

        :return: Nested dictionary containing all genes referenced within this trial
        """

        for step in self.treatment_list['step']:
            if 'match' in step:
                self._extract_data_from_match(step['match'][0])

            if 'arm' in step:
                for arm in step['arm']:
                    if 'match' in arm:
                        self._extract_data_from_match(arm['match'][0])

                    if 'dose_level' in arm:
                        for dose in arm['dose_level']:
                            if 'match' in dose:
                                self._extract_data_from_match(dose['match'][0])

        if self.cancer_type_dict is None:
            self.cancer_type_dict = {
                'diagnoses': [],
                'primary_cancer_types': [],
                'cancer_types_expanded': [],
                'excluded_cancer_types': []
            }

        weighted_cancer_types = []
        for ct in self.cancer_type_dict['primary_cancer_types']:
            suggestion = self._get_cancer_type_weight(ct, hierarchy='primary')
            weighted_cancer_types.append(suggestion)

        for ct in set(self.cancer_type_dict['cancer_types_expanded']) - set(
                self.cancer_type_dict['primary_cancer_types']):
            suggestion = self._get_cancer_type_weight(ct, hierarchy='default')
            weighted_cancer_types.append(suggestion)

        weighted_variants = {}
        for key in ['variants', 'cnvs', 'svs', 'wts']:
            weighted_variants[key] = []
            for v in set(self.vdict[key]):
                suggestion = self._get_variants_weight(v, esrule=key)
                weighted_variants[key].append(suggestion)

        suggestors = {
            "cancer_type_suggest":
            weighted_cancer_types,
            "hugo_symbol_suggest": {
                "input": list(set(self.genes))
            },
            "variant_suggest": [
                i for i in weighted_variants['variants']
                if not i['input'].endswith('any')
            ],
            "wildtype_suggest":
            weighted_variants['wts'],
            "cnv_suggest":
            weighted_variants['cnvs'],
            "sv_suggest":
            weighted_variants['svs'],
            "protocol_no_suggest": {
                'input': self.summary['protocol_number']
            },
            "disease_center_suggest": {
                'input': [
                    i.replace('(', '').replace(')', '')
                    for i in self.summary['disease_center'].split()
                ],
                'output':
                self.summary['disease_center']
            },
            'disease_status_suggest': {
                'input': self.summary['disease_status']
            },
            'drug_suggest': {
                'input': [i.title() for i in self.summary['drugs']]
            },
            'investigator_suggest':
            self._get_investigator_suggest(self.summary['investigator'],
                                           self.summary['dfci_investigator']),
            'mmr_status_suggest': {
                'input': self.summary['mmr_status'] + self.summary['ms_status']
            },
            'nct_number_suggest': {
                'input': self.summary['nct_number']
            }
        }

        searchers = {
            "tumor_types":
            list(set(self._get_tumor_types_search(weighted_cancer_types))),
            "genes":
            list(set(self.genes)),
            "variants":
            list(set([i['input'] for i in weighted_variants['variants']])),
            "wildtype_genes":
            list(set([i['input'] for i in weighted_variants['wts']])),
            "cnv_genes":
            list(set([i['input'] for i in weighted_variants['cnvs']])),
            "sv_genes":
            list(set([i['input'] for i in weighted_variants['svs']])),
            "exclusion_genes":
            list(set(self.vdict['exclusions'])),
            "protocol_no":
            self.summary["protocol_number"],
            "drugs":
            self.summary["drugs"],
            "age":
            self.summary["age_summary"],
            "phase":
            self.summary["phase_summary"],
            "disease_status":
            self.summary["disease_status"],
            "nct_number":
            self.summary["nct_number"],
            "disease_center":
            self.summary["disease_center"],
            "mmr_status":
            self.summary["mmr_status"],
            "ms_status":
            self.summary["ms_status"],
            "mutational_signatures":
            self.summary["mutational_signatures"],
            "investigator":
            [i['output'] for i in suggestors['investigator_suggest']],
            "short_title":
            self.summary["short_title"]
        }

        return suggestors, searchers, parse_primary_cancer_types(
            self.cancer_type_dict['primary_cancer_types'])

Example #14

0

Show file

File: trial_search.py Project: dfci/matchminer-api

class Autocomplete:

    def __init__(self, item):
        """
        Creates data for ElasticSearch's autocomplete index

        :param item: Trial info:
                    - treatment_list: Nested dictionary containing all match criteria
                    - summary:        Summary object created by the API
        """
        self.summary = item['_summary']
        self.treatment_list = item['treatment_list']

        self.vdict = {
            'variants': [],
            'wts': [],
            'svs': [],
            'cnvs': [],
            'exclusions': []
        }
        self.genes = []
        self.cancer_type_dict = None
        self.m = MatchEngine(get_db())

    @staticmethod
    def _get_cancer_type_weight(cancer_type, hierarchy='default'):
        """
        Sets the weights for ElasticSearch autocompletion on cancer types. Cancer type terms
        are split so that autocomplete suggestions will populate regardless of which word in the
        multi-word cancer type string is initially input. Higher weighted terms will populate the
        top of the autocomplete dropdown list.

        :param cancer_type: Text to display in the autocomplete dropdown list.
        :param hierarchy: Weight to give the text.
        :return: Dictionary specifying ElasticSearch rules.
        """

        weight_dict = {'primary': 10, 'default': 5, 'bucket': 20}
        if cancer_type == 'All Solid Tumors' or cancer_type == 'All Liquid Tumors':
            hierarchy = 'bucket'

        return {
            'input': list(set([cancer_type] + [i for i in cancer_type.split() if len(i) > 3])),
            'output': cancer_type,
            'weight': weight_dict[hierarchy]
        }

    @staticmethod
    def _get_variants_weight(variant, esrule='variants'):
        """
        Sets the weights for ElasticSearch autocompletion on gene variants. Higher weighted terms will populate the
        top of the autocomplete dropdown list.

        :param variant: Text to display in the autocomplete dropdown list.
        :param esrule: Type of variant. This will determine the ElasticSearch parameters.
        :return: Dictionary specifying ElasticSearch rules.
        """

        weight_dict = {
            'variants': 1,
            'wts': 5,
            'svs': 3,
            'cnvs': 3
        }
        return {'input': variant, 'weight': weight_dict[esrule]}

    @staticmethod
    def _get_investigator_suggest(investigator, dfci_investigator):
        """
        Creates a list of investigators from the _summary field of the trial collection
        """

        iin = []
        iout = ''
        ispl = [i.strip() for i in investigator.split(',')]
        if len(ispl) == 1:
            iin = [ispl[0]]
            iout = investigator
        elif len(ispl) >= 2:
            iin = [ispl[0], ispl[1]]
            iout = '%s %s' % (ispl[1], ispl[0])

        dfci_in = []
        dfci_out = ''
        if dfci_investigator is not None and 'first_name' in dfci_investigator:
            dfci_in.append(dfci_investigator['first_name'].strip())
            dfci_out += dfci_investigator['first_name'].strip()
        if dfci_investigator is not None and 'last_name' in dfci_investigator:
            dfci_in.append(dfci_investigator['last_name'].strip())
            dfci_out += ' %s' % dfci_investigator['last_name'].strip()

        inv_suggest = [{
            'input': [i for i in iin if i != ''],
            'output': iout
        }]
        if dfci_out != iout and dfci_out != '':
            inv_suggest.append({
                'input': dfci_in,
                'output': dfci_out.strip()
            })

        return inv_suggest

    @staticmethod
    def _get_tumor_types_search(ct_suggest):
        """
        Maps special cancer type text output to the values stored in the ElasticSearch index.

        :param ct_suggest: Cancer type text to display.
        :return: Cancer type text stored in th ElasticSearch index, which we will query.
        """

        tts = []
        for ct in ct_suggest:
            if 'output' in ct and ct['output'] == 'All Solid Tumors':
                tts.append('_SOLID_')
            elif 'output' in ct and ct['output'] == 'All Liquid Tumors':
                tts.append('_LIQUID_')
            else:
                tts.append(ct['output'])

        return tts

    def _extract_data_from_match(self, match):
        """
        Extract Cancer Type, Gene, and Variant data from the given match tree
        """

        g = self.m.create_match_tree(match)
        pmt = ParseMatchTree(g)
        self.cancer_type_dict = pmt.extract_cancer_types()
        self.genes.extend(pmt.extract_genes())
        vdict_tmp = pmt.extract_variants()
        for k, v in self.vdict.iteritems():
            v.extend(vdict_tmp[k])

    def add_autocomplete(self):
        """
        Recursively iterates through the treatment list and creates a list of genes contained within.

        :return: Nested dictionary containing all genes referenced within this trial
        """

        for step in self.treatment_list['step']:
            if 'match' in step:
                self._extract_data_from_match(step['match'][0])

            if 'arm' in step:
                for arm in step['arm']:
                    if 'match' in arm:
                        self._extract_data_from_match(arm['match'][0])

                    if 'dose_level' in arm:
                        for dose in arm['dose_level']:
                            if 'match' in dose:
                                self._extract_data_from_match(dose['match'][0])

        if self.cancer_type_dict is None:
            self.cancer_type_dict = {
                'diagnoses': [],
                'primary_cancer_types': [],
                'cancer_types_expanded': [],
                'excluded_cancer_types': []
            }

        weighted_cancer_types = []
        for ct in self.cancer_type_dict['primary_cancer_types']:
            suggestion = self._get_cancer_type_weight(ct, hierarchy='primary')
            weighted_cancer_types.append(suggestion)

        for ct in set(self.cancer_type_dict['cancer_types_expanded']) - set(self.cancer_type_dict['primary_cancer_types']):
            suggestion = self._get_cancer_type_weight(ct, hierarchy='default')
            weighted_cancer_types.append(suggestion)

        weighted_variants = {}
        for key in ['variants', 'cnvs', 'svs', 'wts']:
            weighted_variants[key] = []
            for v in set(self.vdict[key]):
                suggestion = self._get_variants_weight(v, esrule=key)
                weighted_variants[key].append(suggestion)

        suggestors = {
            "cancer_type_suggest": weighted_cancer_types,
            "hugo_symbol_suggest": {"input": list(set(self.genes))},
            "variant_suggest": [i for i in weighted_variants['variants'] if not i['input'].endswith('any')],
            "wildtype_suggest": weighted_variants['wts'],
            "cnv_suggest": weighted_variants['cnvs'],
            "sv_suggest": weighted_variants['svs'],
            "protocol_no_suggest": {'input': self.summary['protocol_number']},
            "disease_center_suggest": {
                'input': [i.replace('(', '').replace(')', '') for i in self.summary['disease_center'].split()],
                'output': self.summary['disease_center']
            },
            'disease_status_suggest': {'input': self.summary['disease_status']},
            'drug_suggest': {'input': [i.title() for i in self.summary['drugs']]},
            'investigator_suggest': self._get_investigator_suggest(self.summary['investigator'],
                                                                   self.summary['dfci_investigator']),
            'mmr_status_suggest': {'input': self.summary['mmr_status'] + self.summary['ms_status']},
            'nct_number_suggest': {'input': self.summary['nct_number']}
        }

        searchers = {
            "tumor_types": list(set(self._get_tumor_types_search(weighted_cancer_types))),
            "genes": list(set(self.genes)),
            "variants": list(set([i['input'] for i in weighted_variants['variants']])),
            "wildtype_genes": list(set([i['input'] for i in weighted_variants['wts']])),
            "cnv_genes": list(set([i['input'] for i in weighted_variants['cnvs']])),
            "sv_genes": list(set([i['input'] for i in weighted_variants['svs']])),
            "exclusion_genes": list(set(self.vdict['exclusions'])),
            "protocol_no": self.summary["protocol_number"],
            "drugs": self.summary["drugs"],
            "age": self.summary["age_summary"],
            "phase": self.summary["phase_summary"],
            "disease_status": self.summary["disease_status"],
            "nct_number": self.summary["nct_number"],
            "disease_center": self.summary["disease_center"],
            "mmr_status": self.summary["mmr_status"],
            "ms_status": self.summary["ms_status"],
            "mutational_signatures": self.summary["mutational_signatures"],
            "investigator": [i['output'] for i in suggestors['investigator_suggest']],
            "short_title": self.summary["short_title"]
        }

        return suggestors, searchers, parse_primary_cancer_types(self.cancer_type_dict['primary_cancer_types'])