Esempio n. 1
0
def download():
    """

    :return:
    """
    years = range(1997, datetime.date.today().year + 1)

    print datetime.datetime.today(), "- downloading data for years:", years

    for year in years:
        paginate = True
        page = 1
        while paginate:
            params = {"year": year, "page": page}

            print "downloading", year, "page", page
            res = requests.get(ACLED_URL, params=params)

            data = res.json()
            events = data['data']
            print "indexing on", es.ES_ACLED_INDEX, "..."

            for event in events:
                event = hydrate_event(event)
                es.index(event)

            page += 1
            if data['count'] < 500:
                paginate = False
Esempio n. 2
0
    def upsert(self, session=None):
        """ insert or update intelligently """
        self.last_updated = datetime.datetime.now()
        res = es.index(index=dbname, doc_type=self.doc_type, body=self.to_json())

        if res.get("_id",None):
            self._id = res.get("_id")
        return res
Esempio n. 3
0
    def upsert(self, session=None):
        """ insert or update intelligently """
        self.last_updated = datetime.datetime.now()
        res = es.index(index=dbname, doc_type=self.doc_type, body=self.to_json())

        if res.get("_id",None):
            self._id = res.get("_id")
        return res
Esempio n. 4
0
 def create(self):
     """ first time sabve an instance to DB """
     self.last_updated = datetime.datetime.now()
     self.created = self.last_updated
     res = es.index(index=dbname, doc_type=self.doc_type, body=self.to_json())
Esempio n. 5
0
def index_one(id):
    es = Elasticsearch(host)
    d = Document.object.get(pk=id)
    doc = get_doc(d)
    es.index(index=_index,doc_type=_doc_type,body=doc,id=id)
Esempio n. 6
0
 def create(self):
     """ first time sabve an instance to DB """
     self.last_updated = datetime.datetime.now()
     self.created = self.last_updated
     res = es.index(index=dbname, doc_type=self.doc_type, body=self.to_json())
Esempio n. 7
0
import es
import json
import glob
import time

es.delete_index("messages")
es.create_index("messages")

nb = 0
for filename in glob.iglob('data/**/*.json', recursive=True):
    nb += 1
    if nb % 500 == 0:
        time.sleep(1)
    with open(filename, encoding="utf8") as f:
        item = json.load(f)
        es.index("messages", "message", item)