Exemple #1
0
def write_res_doc2es_first_id(res_name, res_id_fields, res_doc):
    global MIG_TRIED_COUNT

    if res_name in resources_metadata:
        res_doc['_metadata'] = copy.deepcopy(resources_metadata[res_name])

    fill_autocomplete(res_name, res_doc)

    idx_name = get_index_name(res_name)
    MIG_TRIED_COUNT[res_name] += 1

    doc_id = resources_description.RESOURCES_BY_RES_NAME[res_name].get_doc_id(
        res_doc)

    if DELETE_AND_CREATE_INDEXES:
        es_util.index_doc_bulk(idx_name, doc_id, res_doc)
    else:
        es_util.update_doc_bulk(idx_name, doc_id, doc=res_doc, upsert=True)
Exemple #2
0
    def save_denormalization_dict(
            cls,
            resource_desc: resources_description.ResourceDescription,
            dn_dict: dict,
            get_update_script_and_size,
            new_mappings=None,
            do_index=False):
        if new_mappings:
            es_util.update_doc_type_mappings(resource_desc.idx_name,
                                             new_mappings)

        progressbar_name = '{0}-dn-{1}'.format(cls.RESOURCE.res_name,
                                               resource_desc.res_name)
        doc_ids = list(dn_dict.keys())
        p_bar = progress_bar_handler.get_new_progressbar(
            progressbar_name, len(dn_dict))
        entity_dn_count = 0
        for doc_id_i in doc_ids:
            if DenormalizationHandler.STOP:
                return

            update_doc, update_size = get_update_script_and_size(
                doc_id_i, dn_dict[doc_id_i])
            # Indexes instead of update if it is requested
            if do_index:
                es_util.index_doc_bulk(resource_desc.idx_name, doc_id_i,
                                       update_doc)
            else:
                es_util.update_doc_bulk(resource_desc.idx_name,
                                        doc_id_i,
                                        doc=update_doc)

            entity_dn_count += 1
            p_bar.update(entity_dn_count)

        es_util.bulk_submitter.finish_current_queues()

        p_bar.finish()
Exemple #3
0
    def do_complete_data(self, doc: dict, total_docs: int, index: int,
                         first: bool, last: bool):
        if first:
            self.complete_data_pb = progress_bar_handler.get_new_progressbar(
                '{0}-data-completion'.format(self.RESOURCE.idx_name),
                total_docs)
            mappings = self.get_custom_mappings_for_complete_data()
            if len(mappings.keys()) > 0:
                self.update_mappings(mappings)
        update_doc = self.get_doc_for_complete_data(doc)
        if update_doc is not None:
            es_util.update_doc_bulk(self.RESOURCE.idx_name,
                                    self.RESOURCE.get_doc_id(doc),
                                    doc=update_doc)

        es_util.bulk_submitter.set_complete_futures(True)

        if last:
            es_util.bulk_submitter.finish_current_queues()
            es_util.bulk_submitter.set_complete_futures(False)
            self.complete_data_pb.finish()
        else:
            self.complete_data_pb.update(index)
Exemple #4
0
with open(molecules_file, 'r') as csv_file:
    molecule_reader = csv.reader(csv_file)
    for row in molecule_reader:
        molecule_chembl_ids.add(row[0].strip())

print("READ {} DISTINCT MOLECULES".format(len(molecule_chembl_ids)))

NEW_MAPPINGS = {
    'properties': {
        '_metadata': {
            'properties': {
                'is_covid19_ds': DefaultMappings.BOOLEAN
            }
        }
    }
}

es_util.update_mappings_idx(resources_description.MOLECULE.idx_name, NEW_MAPPINGS)

es_util.bulk_submitter.start()
for chembl_id_i in molecule_chembl_ids:
    update_doc = {
            '_metadata': {
                'is_covid19_ds': True
            }
        }
    es_util.update_doc_bulk(resources_description.MOLECULE.idx_name, chembl_id_i, doc=update_doc)

es_util.bulk_submitter.join()