예제 #1
0
def setup():

    if not Index(Schema.Index.name).exists():
        Schema.init()

    if not Index(SchemaClass.Index.name).exists():
        SchemaClass.init()

    if not Index(Dataset.Index.name).exists():
        Dataset.init()
예제 #2
0
def update_n3c_routine():
    from discovery.data.dataset import Dataset

    datasets = Dataset.search().query("exists", field="_n3c.url")
    datasets = datasets.source(False).scan()

    for dataset in datasets:
        update_n3c_status(dataset.meta.id)
예제 #3
0
def reset():

    index_1 = Index(Schema.Index.name)
    index_2 = Index(SchemaClass.Index.name)
    index_3 = Index(Dataset.Index.name)

    if index_1.exists():
        index_1.delete()

    if index_2.exists():
        index_2.delete()

    if index_3.exists():
        index_3.delete()

    Schema.init()
    SchemaClass.init()
    Dataset.init()
예제 #4
0
def update_n3c_status(_id):
    import requests
    try:
        dataset = ESDataset.get(_id)
        dataset.update(_n3c={
            "url": dataset._n3c.url,
            "status": requests.get(dataset._n3c.url).json()["fields"]["status"]["name"],
            "timestamp": datetime.now(timezone.utc)
        })
    except Exception as exc:
        logging.warning(str(exc))
예제 #5
0
def log_N3C_response(_id, http_response):
    log_response(http_response)
    if http_response.code == 201:
        try:
            url = json.loads(http_response.body)["self"]
            # {
            #   "id":"10668",
            #   "key":"EXTDATASET-33",
            #   "self":"https://n3c-help.atlassian.net/rest/api/3/issue/10668"
            # }
            indices.refresh()
            dataset = ESDataset.get(_id)
            dataset.update(_n3c={"url": url})
        except Exception as exc:
            logging.error(str(exc))
예제 #6
0
def updateDocs():
    '''
        Give first gen docs fields created and last updated fields to match new gen docs
    '''
    docs = Dataset.search()
    for doc in docs.scan():
        created = getattr(getattr(doc, "_meta", None), 'date_created', None)
        updated = getattr(getattr(doc, "_meta", None), 'last_updated', None)
        if not created:
            doc.update(**{'_meta': {'guide': options.default_create}})
            logging.info(f'Updating date_created field for doc {doc["_id"]}')
        if not updated:
            doc.update(**{'_meta': {'guide': options.default_last_updated}})
            logging.info(f'Updating last_updated field for doc {doc["_id"]}')
        else:
            logging.info(
                f"Doc {doc['_id']} already has created date {created}")
예제 #7
0
def main():

    parse_command_line()
    client = Elasticsearch(options.target_host)

    # create index
    datestring = ''.join(str(item) for item in datetime.now().timetuple()[:-1])
    index_name = INDEX_PREFIX + datestring
    _ = client.indices.create(
        index_name, {
            "settings": {
                "query": {
                    "default_field": "all"
                },
                "default_pipeline": "resources-common",
                "analysis": {
                    "normalizer": {
                        "keyword_lowercase_normalizer": {
                            "filter": ["lowercase"],
                            "type": "custom",
                            "char_filter": []
                        }
                    },
                    "analyzer": {
                        "string_lowercase": {
                            "filter": "lowercase",
                            "tokenizer": "keyword"
                        },
                        "whitespace_lowercase": {
                            "filter": "lowercase",
                            "tokenizer": "whitespace"
                        }
                    }
                }
            },
            "mappings": {
                "_doc": {
                    "properties": requests.get(MAPPING_URL).json(),
                    "dynamic": False
                }
            }
        })
    logging.debug(_)

    for doc in Dataset.search().scan():
        dic = doc.to_json()
        if dic.get('@type') == 'outbreak:Dataset':
            dic['@type'] = 'Dataset'
            try:
                client.index(index_name, dic, id=doc.meta.id)
            except RequestError as err:
                logging.error(err.info)

    # switch index alias
    _ = client.indices.update_aliases({
        "actions": [{
            "remove": {
                "index": INDEX_PREFIX + '*',
                "alias": INDEX_ALIAS
            }
        }, {
            "add": {
                "index": index_name,
                "alias": INDEX_ALIAS,
            }
        }]
    })
    logging.debug(_)

    # delete old index
    indices = list(client.indices.get(INDEX_PREFIX + '*').keys())
    indices.remove(index_name)
    indices = ','.join(indices)
    _ = client.indices.delete(indices)
    logging.info(_)