def setup(): if not Index(Schema.Index.name).exists(): Schema.init() if not Index(SchemaClass.Index.name).exists(): SchemaClass.init() if not Index(Dataset.Index.name).exists(): Dataset.init()
def update_n3c_routine(): from discovery.data.dataset import Dataset datasets = Dataset.search().query("exists", field="_n3c.url") datasets = datasets.source(False).scan() for dataset in datasets: update_n3c_status(dataset.meta.id)
def reset(): index_1 = Index(Schema.Index.name) index_2 = Index(SchemaClass.Index.name) index_3 = Index(Dataset.Index.name) if index_1.exists(): index_1.delete() if index_2.exists(): index_2.delete() if index_3.exists(): index_3.delete() Schema.init() SchemaClass.init() Dataset.init()
def update_n3c_status(_id): import requests try: dataset = ESDataset.get(_id) dataset.update(_n3c={ "url": dataset._n3c.url, "status": requests.get(dataset._n3c.url).json()["fields"]["status"]["name"], "timestamp": datetime.now(timezone.utc) }) except Exception as exc: logging.warning(str(exc))
def log_N3C_response(_id, http_response): log_response(http_response) if http_response.code == 201: try: url = json.loads(http_response.body)["self"] # { # "id":"10668", # "key":"EXTDATASET-33", # "self":"https://n3c-help.atlassian.net/rest/api/3/issue/10668" # } indices.refresh() dataset = ESDataset.get(_id) dataset.update(_n3c={"url": url}) except Exception as exc: logging.error(str(exc))
def updateDocs(): ''' Give first gen docs fields created and last updated fields to match new gen docs ''' docs = Dataset.search() for doc in docs.scan(): created = getattr(getattr(doc, "_meta", None), 'date_created', None) updated = getattr(getattr(doc, "_meta", None), 'last_updated', None) if not created: doc.update(**{'_meta': {'guide': options.default_create}}) logging.info(f'Updating date_created field for doc {doc["_id"]}') if not updated: doc.update(**{'_meta': {'guide': options.default_last_updated}}) logging.info(f'Updating last_updated field for doc {doc["_id"]}') else: logging.info( f"Doc {doc['_id']} already has created date {created}")
def main(): parse_command_line() client = Elasticsearch(options.target_host) # create index datestring = ''.join(str(item) for item in datetime.now().timetuple()[:-1]) index_name = INDEX_PREFIX + datestring _ = client.indices.create( index_name, { "settings": { "query": { "default_field": "all" }, "default_pipeline": "resources-common", "analysis": { "normalizer": { "keyword_lowercase_normalizer": { "filter": ["lowercase"], "type": "custom", "char_filter": [] } }, "analyzer": { "string_lowercase": { "filter": "lowercase", "tokenizer": "keyword" }, "whitespace_lowercase": { "filter": "lowercase", "tokenizer": "whitespace" } } } }, "mappings": { "_doc": { "properties": requests.get(MAPPING_URL).json(), "dynamic": False } } }) logging.debug(_) for doc in Dataset.search().scan(): dic = doc.to_json() if dic.get('@type') == 'outbreak:Dataset': dic['@type'] = 'Dataset' try: client.index(index_name, dic, id=doc.meta.id) except RequestError as err: logging.error(err.info) # switch index alias _ = client.indices.update_aliases({ "actions": [{ "remove": { "index": INDEX_PREFIX + '*', "alias": INDEX_ALIAS } }, { "add": { "index": index_name, "alias": INDEX_ALIAS, } }] }) logging.debug(_) # delete old index indices = list(client.indices.get(INDEX_PREFIX + '*').keys()) indices.remove(index_name) indices = ','.join(indices) _ = client.indices.delete(indices) logging.info(_)