def get_custom_mappings_for_complete_data(self): mappings = SummableDict() mappings += AssayDenormalizationHandler.ACTIVITY_DATA_MAPPING mappings += CompoundDenormalizationHandler.ACTIVITY_DATA_MAPPING mappings += SourceDenormalizationHandler.METADATA_MAPPING mappings += OrganismDenormalizationHandler.METADATA_MAPPING mappings += CompoundRecordDenormalizationHandler.ACTIVITY_DATA_MAPPING mappings += TargetDenormalizationHandler.ACTIVITY_DATA_MAPPING mappings += ProteinClassDenormalizationHandler.METADATA_MAPPING mappings += DocumentDenormalizationHandler.FIELDS_FOR_ACTIVITY_MAPPING mappings += { 'properties': { '_metadata': { 'properties': { 'activity_generated': { 'properties': { 'short_data_validity_comment': DefaultMappings.KEYWORD } } } } } } return mappings
def get_custom_mappings_for_complete_data(self): mappings = SummableDict() mappings += SourceDenormalizationHandler.METADATA_MAPPING mappings += OrganismDenormalizationHandler.METADATA_MAPPING mappings += { 'properties': { '_metadata': { 'properties': { 'assay_generated': { 'properties': { 'confidence_label': DefaultMappings.KEYWORD, 'relationship_label': DefaultMappings.KEYWORD, 'type_label': DefaultMappings.KEYWORD } } } } } } if self.document_dh_mappings: mappings += self.document_dh_mappings return mappings
def scan_index(self, es_index, on_doc=None, query=None): if self.es_conn is None: print( "FATAL ERROR: there is not an elastic search connection defined.", file=sys.stderr) traceback.print_exc(file=sys.stderr) sys.exit(1) if query is None: query = {} query['track_total_hits'] = True search_res = self.es_conn.search(index=es_index, body=query) total_docs = search_res['hits']['total']['value'] update_every = min(math.ceil(total_docs * 0.001), 1000) scan_query = SummableDict() if query: scan_query += query scanner = helpers.scan(self.es_conn, index=es_index, scroll='10m', query=query, size=1000) count = 0 p_bar = progress_bar_handler.get_new_progressbar( '{0}_es-index-scan'.format(es_index), total_docs) for doc_n in scanner: if callable(on_doc): should_stop = on_doc(doc_n['_source'], doc_n['_id'], total_docs, count, count == 0, count == total_docs - 1) if should_stop or self.stop_scan: return count += 1 if count % update_every == 0: p_bar.update(count) p_bar.finish()
def get_custom_mappings_for_complete_data(self): mappings = SummableDict() mappings += { 'properties': { '_metadata': { 'properties': { 'all_molecule_chembl_ids': DefaultMappings.CHEMBL_ID_REF } } } } return mappings
def get_all_dn_dicts(self): total_dn_dict = SummableDict() pb = get_new_progressbar('built-dn-hierarchy-dict', len(self.children)) current = 0 for node in self.children.values(): dn_dict_i, shared_family_data, node_data = node.get_denormalization_dict( ) for chembl_id, dn_data in dn_dict_i.items(): total_dn_dict[chembl_id] = dn_data current += 1 pb.update(current) pb.finish() return total_dn_dict
def save_similarity_data(self, chembl_id_1, chembl_id_2, mol_tani, tid_tani): if chembl_id_1 not in self.document_dict: self.document_dict[chembl_id_1] = {} if chembl_id_2 not in self.document_dict[chembl_id_1]: self.document_dict[chembl_id_1][chembl_id_2] = [] data_dict = SummableDict() data_dict += { 'document_chembl_id': chembl_id_2, 'mol_tani': mol_tani, 'tid_tani': tid_tani } data_dict += self.document_dh.docs_for_assay_by_chembl_id[chembl_id_2] self.document_dict[chembl_id_1][chembl_id_2].append(data_dict)
def get_custom_mappings_for_complete_data(self): mappings = SummableDict() mappings += { 'properties': { '_metadata': { 'properties': { 'all_molecule_chembl_ids': DefaultMappings.CHEMBL_ID_REF, 'parent_molecule_chembl_id': DefaultMappings.CHEMBL_ID_REF, 'should_appear_in_browser': DefaultMappings.BOOLEAN } } } } return mappings
def save_denormalization(self): for key_i, value_i in self.document_dict.items(): for key_j, value_j in value_i.items(): if len(value_j) != 1: print('WARNING FOUND DUPLICATE DATA FOR:', key_i, key_j, value_j, file=sys.stderr) def get_update_script_and_size(es_doc_id, es_doc): similar_docs = [] for chembl_id_other, similarity_list in es_doc.items(): similar_docs.append(similarity_list[0]) update_size = len(similar_docs) * 10 update_doc = {'_metadata': {'similar_documents': similar_docs}} return update_doc, update_size new_mappings = SummableDict() new_mappings += { 'properties': { '_metadata': { 'properties': { 'similar_documents': { 'properties': { 'document_chembl_id': DefaultMappings.CHEMBL_ID_REF, 'mol_tani': DefaultMappings.FLOAT, 'tid_tani': DefaultMappings.FLOAT } } } } } } new_mappings += DocumentDenormalizationHandler.FIELDS_FOR_DOC_SIM_MAPPING self.save_denormalization_dict( DenormalizationHandler.AVAILABLE_RESOURCES.DOCUMENT, self.document_dict, get_update_script_and_size, new_mappings=new_mappings)
def _recursive_simplify_es_properties(cur_dict: dict, cur_prefix: str): simple_props = SummableDict() for key, value in cur_dict.items(): next_prefix = '{0}.{1}'.format(cur_prefix, key) if cur_prefix else key if isinstance(value, dict): if 'es_mapping_leaf' in value.keys(): simple_props[next_prefix] = simplify_single_mapping(value) else: simple_props[next_prefix] = { 'type': 'object', 'aggregatable': False, 'sortable': False } simple_props += _recursive_simplify_es_properties( value, next_prefix) elif value: simple_props[next_prefix] = value return simple_props
def get_custom_mappings_for_complete_data(self): mappings = SummableDict() mappings += unichem_helper.UNICHEM_MAPPING mappings += ATCClassDenormalizationHandler.METADATA_MAPPING mappings += { 'properties': { '_metadata': { 'properties': { 'compound_generated': { 'properties': { 'availability_type_label': DefaultMappings.KEYWORD, 'chirality_label': DefaultMappings.KEYWORD, 'image_file': DefaultMappings.KEYWORD } } } } } } return mappings
def save_denormalization(self): mappings = SummableDict() mappings += self.METADATA_MAPPING mappings += ProteinClassDenormalizationHandler.METADATA_MAPPING def get_update_script_and_size(doc_id, doc): update_size = len(doc) * 20 protein_classifications = self.get_protein_classifications(doc_id) update_doc = { '_metadata': { 'target_component': doc, 'protein_classification': protein_classifications } } return update_doc, update_size self.save_denormalization_dict( DenormalizationHandler.AVAILABLE_RESOURCES.TARGET, self.target_dict, get_update_script_and_size, mappings)
def get_new_index_mappings(): return { 'properties': { 'parent_molecule': { 'properties': MOLECULE.get_resource_mapping_from_es() }, 'drug_indication': { 'properties': SummableDict( **DRUG_INDICATION.get_resource_mapping_from_es()) - ['efo_term', 'efo_id'] + { 'efo': { 'properties': { 'term': DefaultMappings.LOWER_CASE_KEYWORD + DefaultMappings.TEXT_STD, 'id': DefaultMappings.ID } } } } } }
def get_custom_mappings_for_complete_data(self): mappings = SummableDict() mappings += { 'properties': { '_metadata': { 'properties': { 'compound_data': { 'properties': { 'drug_image_file': DefaultMappings.KEYWORD, 'metabolite_image_file': DefaultMappings.KEYWORD, 'substrate_image_file': DefaultMappings.KEYWORD, 'drug_pref_name': DefaultMappings.KEYWORD, 'metabolite_pref_name': DefaultMappings.KEYWORD, 'substrate_pref_name': DefaultMappings.KEYWORD } } } } } } return mappings
def get_config_for_prop(index_name, prop_id): cache_key = 'property_config-{index_name}-{prop_id}'.format( index_name=index_name, prop_id=prop_id) cache_response = cache.get(cache_key) if cache_response is not None: return cache_response index_mapping = resources_description.RESOURCES_BY_IDX_NAME.get(index_name) if index_mapping is None: raise ESPropsConfigurationManagerError( "The index {} does not exist!".format(index_name)) simplified_mapping = index_mapping.get_simplified_mapping_from_es() es_property_description = simplified_mapping.get(prop_id) found_in_es = es_property_description is not None if not found_in_es: es_property_description = {} # Search for description in override config_override = yaml.load(open(settings.PROPERTIES_CONFIG_OVERRIDE_FILE, 'r'), Loader=yaml.FullLoader) found_in_override = False if config_override is not None: index_override = config_override.get(index_name) if index_override is not None: property_override_description = index_override.get(prop_id) found_in_override = property_override_description is not None config = {} if not found_in_es and not found_in_override: raise ESPropsConfigurationManagerError( "The property {} does not exist in elasticsearch or as virtual property" .format(prop_id)) elif found_in_es and not found_in_override: # this is a normal property WITHOUT override config = SummableDict({ 'index_name': index_name, 'prop_id': prop_id, }) config += SummableDict(es_property_description) elif not found_in_es and found_in_override: # this is a virtual property config = SummableDict({ 'index_name': index_name, 'prop_id': prop_id, 'is_virtual': True }) based_on = property_override_description.get('based_on') if based_on is not None: config['is_contextual'] = False base_description = simplified_mapping.get(based_on) if base_description is None: raise ESPropsConfigurationManagerError( 'The virtual property {prop_id} is based on {based_on} which does not exist in elasticsearch ' 'index {index_name}'.format(prop_id=prop_id, based_on=based_on, index_name=index_name)) config += SummableDict(base_description) else: config['is_contextual'] = True if property_override_description.get('aggregatable') is None or \ property_override_description.get('type') is None or \ property_override_description.get('sortable') is None: raise ESPropsConfigurationManagerError( 'A contextual property must define the type and if it is ' 'aggregatable and sortable. index => {} : prop => {}'. format(index_name, prop_id)) config += property_override_description elif found_in_es and found_in_override: # this is a normal overridden property config = SummableDict({ 'index_name': index_name, 'prop_id': prop_id, }) config += SummableDict(es_property_description) config += property_override_description cache.set(cache_key, config, CACHE_TIME) return config
def get_custom_mappings_for_complete_data(self): mappings = SummableDict() return mappings
def get_custom_mappings_for_complete_data(self): mappings = SummableDict() mappings += self.RELATED_ENTITIES_DYNAMIC_MAPPING mappings += OrganismDenormalizationHandler.METADATA_MAPPING return mappings
class DefaultMappings(object): COMMON_CHAR_FILTERS = SummableDict( alphanumeric_and_space_char_filter={ 'type': 'pattern_replace', 'pattern': '[^a-zA-Z0-9\s]', 'replacement': '' }, vitamin_char_filter={ 'type': 'pattern_replace', 'pattern': r'(?:\b(?:vitamin|vit))\s+([a-z])(?:\s*(\d+))?', 'flags': 'CASE_INSENSITIVE', 'replacement': 'vitamin_$1$2' }) COMMON_FILTERS = SummableDict(greek_synonym_filter={ 'type': 'synonym', 'synonyms_path': 'synonyms/greek_letters_synonyms.txt' }, english_stop={ 'type': 'stop', 'stopwords': '_english_' }, english_keywords={ 'type': 'keyword_marker', 'keywords': ['example'] }, english_stemmer={ 'type': 'stemmer', 'language': 'english' }, english_possessive_stemmer={ 'type': 'stemmer', 'language': 'possessive_english' }, large_id_ref_filter={ 'type': 'limit', 'max_token_count': 10**6 }) COMMON_ANALYZERS = SummableDict( greek_syn_std_analyzer={ 'type': 'custom', 'tokenizer': 'standard', 'filter': ['greek_synonym_filter', 'lowercase'], 'char_filter': 'vitamin_char_filter' }, greek_syn_eng_analyzer={ 'type': 'custom', 'tokenizer': 'standard', 'filter': [ 'greek_synonym_filter', # English analyzer based on: # https://www.elastic.co/guide/en/elasticsearch/reference/current/analysis-lang-analyzer.html#english-analyzer 'english_possessive_stemmer', 'lowercase', 'english_stop', 'english_keywords', 'english_stemmer' ], 'char_filter': 'vitamin_char_filter' }, lowercase_keyword={ 'type': 'custom', 'tokenizer': 'keyword', 'filter': 'lowercase' }, alphanumeric_lowercase_keyword={ 'type': 'custom', 'tokenizer': 'keyword', 'filter': 'lowercase', 'char_filter': 'alphanumeric_and_space_char_filter' }, whitespace_alphanumeric_lowercase_std_analyzer={ 'type': 'custom', 'tokenizer': 'whitespace', 'filter': ['lowercase'], 'char_filter': ['vitamin_char_filter', 'alphanumeric_and_space_char_filter'] }, whitespace_alphanumeric_std_no_limit_analyzer={ 'type': 'custom', 'tokenizer': 'whitespace', 'filter': ['large_id_ref_filter'], 'char_filter': ['alphanumeric_and_space_char_filter'] }) COMMON_ANALYSIS = SummableDict(char_filter=COMMON_CHAR_FILTERS, analyzer=COMMON_ANALYZERS, filter=COMMON_FILTERS) # Indexing mappings __NO_INDEX = SummableDict(index=False) __DO_INDEX = SummableDict(index=True) __IGNORE_ABOVE = SummableDict(ignore_above=250) # Basic Types __TEXT_TYPE_NO_OFFSETS = SummableDict(type='text') __TEXT_TYPE = SummableDict(type='text', term_vector='with_positions_offsets') __KEYWORD_TYPE = SummableDict(type='keyword') COMPLETION_TYPE = SummableDict(type='completion', analyzer='greek_syn_std_analyzer') BYTE = SummableDict(type='byte') SHORT = SummableDict(type='short') INTEGER = SummableDict(type='integer') LONG = SummableDict(type='long') FLOAT = SummableDict(type='float') DOUBLE = SummableDict(type='double') BOOLEAN = SummableDict(type='boolean') DATE_Y_M_D = SummableDict(type='date', format='yyyy-MM-dd') # Text Field Types __KEYWORD_FIELD = SummableDict( fields={'keyword': __KEYWORD_TYPE + __DO_INDEX}) __ALPHANUMERIC_LOWERCASE_KEYWORD = SummableDict( fields={ 'alphanumeric_lowercase_keyword': __TEXT_TYPE + __DO_INDEX + { 'analyzer': 'alphanumeric_lowercase_keyword' } }) __LOWER_CASE_KEYWORD_FIELD = SummableDict( fields={ 'lower_case_keyword': __TEXT_TYPE + __DO_INDEX + { 'analyzer': 'lowercase_keyword' } }) __ID_FIELD = SummableDict( fields={'entity_id': __KEYWORD_FIELD['fields']['keyword']}) __ID_REF_FIELD = SummableDict( fields={'id_reference': __KEYWORD_FIELD['fields']['keyword']}) __CHEMBL_ID_FIELD = SummableDict( fields={'chembl_id': __KEYWORD_FIELD['fields']['keyword']}) __CHEMBL_ID_REF_FIELD = SummableDict( fields={ 'chembl_id_reference': __CHEMBL_ID_FIELD['fields']['chembl_id'] }) __STD_ANALYZED_FIELD = SummableDict( fields={ 'std_analyzed': __TEXT_TYPE + __DO_INDEX + { 'analyzer': 'greek_syn_std_analyzer' } }) __ENG_ANALYZED_FIELD = SummableDict( fields={ 'eng_analyzed': __TEXT_TYPE + __DO_INDEX + { 'analyzer': 'greek_syn_eng_analyzer' } }) __WS_ANALYZED_FIELD = SummableDict( fields={ 'ws_analyzed': __TEXT_TYPE + __DO_INDEX + { 'analyzer': 'whitespace_alphanumeric_lowercase_std_analyzer' } }) __ALT_NAME_ANALYZED_FIELD = SummableDict( fields={ 'alt_name_analyzed': __TEXT_TYPE + __DO_INDEX + { 'analyzer': 'greek_syn_eng_analyzer' } }) __PREF_NAME_ANALYZED_FIELD = SummableDict( fields={ 'pref_name_analyzed': __ALT_NAME_ANALYZED_FIELD['fields']['alt_name_analyzed'] }) __TITLE_ANALYZED_FIELD = SummableDict(fields={ 'title_analyzed': __ALT_NAME_ANALYZED_FIELD['fields']['alt_name_analyzed'] }) # Chemical structure field types __SUBSTRUCTURE_FIELD = SummableDict( fields={ "substructure": { "type": "structure_fingerprint", "aromaticity_mode": "preserve" } }) __SIMILARITY_FIELD = SummableDict( fields={ "similarity": { "type": "similarity_fingerprint", "aromaticity_mode": "preserve" } }) # Properties MAPPINGS NO_INDEX_KEYWORD = __NO_INDEX + __KEYWORD_TYPE NO_INDEX_TEXT_NO_OFFSETS = __NO_INDEX + __TEXT_TYPE_NO_OFFSETS # KEYWORD FIELDS indexation for the field itself (Aggregatable fields) KEYWORD = __DO_INDEX + __KEYWORD_TYPE + __KEYWORD_FIELD + __ALPHANUMERIC_LOWERCASE_KEYWORD LOWER_CASE_KEYWORD = __DO_INDEX + __KEYWORD_TYPE + __LOWER_CASE_KEYWORD_FIELD + __ALPHANUMERIC_LOWERCASE_KEYWORD ID = __DO_INDEX + __KEYWORD_TYPE + __ID_FIELD + __ALPHANUMERIC_LOWERCASE_KEYWORD ID_REF = __DO_INDEX + __KEYWORD_TYPE + __ID_REF_FIELD + __ALPHANUMERIC_LOWERCASE_KEYWORD CHEMBL_ID = __DO_INDEX + __KEYWORD_TYPE + __CHEMBL_ID_FIELD + __ALPHANUMERIC_LOWERCASE_KEYWORD CHEMBL_ID_REF = __DO_INDEX + __KEYWORD_TYPE + __CHEMBL_ID_REF_FIELD + __ALPHANUMERIC_LOWERCASE_KEYWORD CHEMBL_ID_REF_AS_WS = __DO_INDEX + __TEXT_TYPE_NO_OFFSETS + \ { 'analyzer': 'whitespace_alphanumeric_std_no_limit_analyzer' } # CHEMICAL FIELDS CHEM_STRUCT_FIELD = __DO_INDEX + __KEYWORD_TYPE + __IGNORE_ABOVE + __SUBSTRUCTURE_FIELD + __SIMILARITY_FIELD # TEXT FIELDS indexation for the field itself (Aggregatable) TEXT_STD = __DO_INDEX + __KEYWORD_TYPE + __IGNORE_ABOVE + __STD_ANALYZED_FIELD + __ENG_ANALYZED_FIELD + \ __WS_ANALYZED_FIELD PREF_NAME = __DO_INDEX + __KEYWORD_TYPE + __IGNORE_ABOVE + __STD_ANALYZED_FIELD + __ENG_ANALYZED_FIELD + \ __WS_ANALYZED_FIELD + __PREF_NAME_ANALYZED_FIELD ALT_NAME = __DO_INDEX + __KEYWORD_TYPE + __IGNORE_ABOVE + __STD_ANALYZED_FIELD + __ENG_ANALYZED_FIELD + \ __WS_ANALYZED_FIELD + __ALT_NAME_ANALYZED_FIELD TITLE = __DO_INDEX + __KEYWORD_TYPE + __IGNORE_ABOVE + __STD_ANALYZED_FIELD + __ENG_ANALYZED_FIELD + \ __WS_ANALYZED_FIELD + __TITLE_ANALYZED_FIELD NUMERIC_MAPPINGS = [BYTE, SHORT, INTEGER, LONG, FLOAT, DOUBLE] INTEGER_NUMERIC_MAPPINGS = [BYTE, SHORT, INTEGER, LONG] AGGREGATABLE_MAPPINGS = [ BOOLEAN, # Numeric Types BYTE, SHORT, INTEGER, LONG, FLOAT, DOUBLE, # Text Types KEYWORD, LOWER_CASE_KEYWORD, ID_REF, CHEMBL_ID_REF, ID, CHEMBL_ID, # Chemical types CHEM_STRUCT_FIELD ] NUMERIC_TYPES = {desc_i['type'] for desc_i in NUMERIC_MAPPINGS} INTEGER_NUMERIC_TYPES = { desc_i['type'] for desc_i in INTEGER_NUMERIC_MAPPINGS } AGGREGATABLE_TYPES = {desc_i['type'] for desc_i in AGGREGATABLE_MAPPINGS} SIMPLE_MAPPINGS = { 'string': {__KEYWORD_TYPE['type'], __TEXT_TYPE['type']}, 'string-es-interal': { COMPLETION_TYPE['type'], }, 'double': {FLOAT['type'], DOUBLE['type']}, 'integer': INTEGER_NUMERIC_TYPES, 'boolean': {BOOLEAN['type']}, 'date': { DATE_Y_M_D['type'], } } SIMPLE_MAPPINGS_REVERSE = {} for s_type, types in SIMPLE_MAPPINGS.items(): for type in types: SIMPLE_MAPPINGS_REVERSE[type] = s_type # Enable/Disable ENABLE = SummableDict(enabled=True) DISABLE = SummableDict(enabled=False)
def get_custom_mappings_for_complete_data(self): mappings = SummableDict() mappings += OrganismDenormalizationHandler.METADATA_MAPPING return mappings
def save_denormalization_for_new_index(self): es_util.delete_idx(self.generated_resource.idx_name) es_util.create_idx(self.generated_resource.idx_name, 3, 1, analysis=DefaultMappings.COMMON_ANALYSIS, mappings=DrugIndicationDenormalizationHandler. get_new_index_mappings()) dn_dict = {} print('{0} GROUPED RECORDS WERE FOUND'.format( len(self.drug_inds_by_grouping_id)), file=sys.stderr) p_bar = progress_bar_handler.get_new_progressbar( 'drug_inds_by_parent-dn-generation', len(self.drug_inds_by_grouping_id)) i = 0 for group_drug_inds in self.drug_inds_by_grouping_id.values(): base_drug_ind = group_drug_inds[0] efo_data = {} indication_refs = [] max_phase_for_ind = 0 for drug_ind_i in group_drug_inds: max_phase_for_ind = max(max_phase_for_ind, drug_ind_i.get('max_phase_for_ind', 0)) efo_id_i = drug_ind_i.get('efo_id', None) if efo_id_i is not None: efo_data[efo_id_i] = drug_ind_i.get('efo_term', None) indication_refs += drug_ind_i.get('indication_refs', []) parent_chembl_id, mesh_id = self.get_drug_ind_grouping_id_parts( base_drug_ind) drug_ind_data = SummableDict( **DRUG_INDICATION.get_doc_by_id_from_es( base_drug_ind['drugind_id'])) drug_ind_data -= ['efo_term', 'efo_id'] drug_ind_data['efo'] = [{ 'id': efo_id, 'term': term } for efo_id, term in efo_data.items()] drug_ind_data['max_phase_for_ind'] = max_phase_for_ind drug_ind_data['indication_refs'] = indication_refs new_mechanism_doc = { 'parent_molecule': MOLECULE.get_doc_by_id_from_es(parent_chembl_id), 'drug_indication': drug_ind_data } doc_id = self.generated_resource.get_doc_id(new_mechanism_doc) dn_dict[doc_id] = new_mechanism_doc i += 1 p_bar.update(i) p_bar.finish() self.save_denormalization_dict( self.generated_resource, dn_dict, DenormalizationHandler.default_update_script_and_size, do_index=True)
def get_denormalization_dict(self): dn_dict = SummableDict() is_drug_src = self.compound_data['is_drug_src'] is_usan_src = self.compound_data['is_usan_src'] is_db_drug = self.compound_data['is_db_drug'] max_phase = self.compound_data['max_phase'] shared_family_data = [{ 'chembl_id': self.chembl_id, 'inchi': self.compound_data['inchi'], 'inchi_connectivity_layer': get_inchi_connectivity_layer(self.compound_data['inchi_key']), 'inchi_key': self.compound_data['inchi_key'] }] node_data = self.get_node_data() children_data = [] for chembl_id_i, node in self.children.items(): is_drug_src |= node.compound_data['is_drug_src'] is_usan_src |= node.compound_data['is_usan_src'] max_phase = max(max_phase, node.compound_data['max_phase']) dn_data_i, sf_data_i, nd_i = node.get_denormalization_dict() children_data.append(nd_i) put_js_path_in_dict(dn_data_i, node.chembl_id + '._metadata.hierarchy.parent', node_data) dn_dict += dn_data_i shared_family_data += sf_data_i # Warning checks! if is_db_drug and is_db_drug != ( (is_usan_src or is_drug_src) and self.is_family_parent()): print('WARNING! {0} has db_drug {1} and sources_drug {2}'.format( self.chembl_id, is_db_drug, (is_usan_src or is_drug_src)), file=sys.stderr) if max_phase != self.compound_data['max_phase']: print( 'WARNING! {0} has db_max_phase of {1} and children max_phase of {2}' .format(self.chembl_id, self.compound_data['max_phase'], max_phase), file=sys.stderr) dn_dict[self.chembl_id] = {} put_js_path_in_dict( dn_dict[self.chembl_id], '_metadata.hierarchy', { 'is_approved_drug': (is_drug_src and max_phase == 4), 'is_usan': is_usan_src, 'children': children_data }) # If root collect the shared family data if self.is_family_parent(): family_inchi_connectivity_layer = get_inchi_connectivity_layer( self.compound_data['inchi_key']) for dn_data in dn_dict.values(): put_js_path_in_dict(dn_data, '_metadata.hierarchy.all_family', shared_family_data) put_js_path_in_dict( dn_data, '_metadata.hierarchy.family_inchi_connectivity_layer', family_inchi_connectivity_layer) return dn_dict, shared_family_data, node_data