def upload(client, fp, encoding=None, delimiter=','): if encoding: os.system("iconv -f "+ encoding + " -t utf-8 " + fp + " --output utf8-" + fp) fo = open("utf8-"+fp) else: fo = open(fp) reader = csv.DictReader(fo, delimiter=delimiter) if (FIRST_TIME): try: client.delete() print "Delete done" client.mapping_update( { "properties" : { "orador" : { "type" : "string", "index" : "not_analyzed" }, "partido" : { "type" : "string", "index" : "not_analyzed" }, "estado" : { "type" : "string", "index" : "not_analyzed" }, "data" : { "type" : "date", "format" : "dd/MM/YYYY" } } }) print 'Mapping done' except "HTTP Error 404": print "Creating new database" print "Inserting rows" client.upsert(funkystuff(reader))
def upload(): # use Deal Number as unique id def add_id(dict_): dict_['id'] = dict_['Deal Number'] return dict_ # in general should not need to delete since have unique id client.delete() client.mapping_update(mapping) client.upsert(map(add_id, csv.DictReader(open(fp), delimiter=';')))
def upload(client, fp, encoding=None, delimiter=','): if encoding: os.system("iconv -f "+ encoding + " -t utf-8 " + fp + " --output utf8-" + fp) fo = open("utf8-"+fp) else: fo = open(fp) reader = csv.DictReader(fo, delimiter=delimiter) try: client.delete() print "Delete done" client.mapping_update( { "properties" : { "Poder" : { "type" : "string", "index" : "not_analyzed" }, "Orgao" : { "type" : "string", "index" : "not_analyzed" }, "UO" : { "type" : "string", "index" : "not_analyzed" }, "Favorecido" : { "type" : "string", "index" : "not_analyzed" }, "Destino" : { "type" : "string", "index" : "not_analyzed" }, "Tipo" : { "type" : "string", "index" : "not_analyzed" }, "Motivo" : { "type" : "string", "index" : "not_analyzed" }, "Origem" : { "type" : "string", "index" : "not_analyzed" }, "DataFim" : { "type" : "date", "format" : "date" }, "DataInicio" : { "type" : "date", "format" : "date" } } }) print 'Mapping done' except "HTTP Error 404": print "Creating new database" print "Inserting rows" client.upsert(funkystuff(reader))
for a in assuntos: if projeto['id'] == str(a['TipoProj']) + '-' + str( a['NoProj']) + '-' + str(a['DataProj']): projeto['assuntos'].append(a['Assunto']) yield projeto #client.delete() #print "Delete done" client.mapping_update({ "properties": { "autores": { "type": "string", "analyzer": "keyword" }, "assuntos": { "type": "string", "analyzer": "keyword" }, "DataProj": { "type": "date", "format": "dd/MM/YYYY" } } }) print 'Mapping done' for row in funkystuff(projetos): client.upsert([row])
autor_errors = 0 for a in autores: id_projeto = a['TipoProj'] + '-' + a['NoProj'] + '-' + a['DataProj'] try: lista_projetos[hex(id_projeto)]['autores'].append(a['Autor']) except: #print 'Falha ao importar autores de ' + id_projeto autor_errors += 1 print 'Ocorreram ' + str(autor_errors) + ' erros na importacao dos autores' assuntos_errors = 0 for a in assuntos: id_projeto = str(a['TipoProj']) + '-' + str(a['NoProj']) + '-' + str(a['DataProj']) try: lista_projetos[hex(id_projeto)]['assuntos'].append(a['Assunto']) except: #print 'Falha ao importar assuntos de ' + id_projeto assuntos_errors += 1 print 'Ocorreram ' + str(assuntos_errors) + ' erros na importacao dos assuntos' def funkystuff(reader): for p in reader: yield p print "Delete done" print 'Mapping done' client.upsert(funkystuff(projetos))
print 'Ocorreram ' + str(assuntos_errors) + ' erros na importacao dos assuntos' def funkystuff(reader): for p in reader: yield p client.delete() print "Delete done" client.mapping_update({ "properties": { "autores": { "type": "string", "analyzer": "keyword" }, "assuntos": { "type": "string", "analyzer": "keyword" }, "DataProj": { "type": "date", "format": "dd/MM/YYYY" } } }) print 'Mapping done' client.upsert(funkystuff(projetos))
for p in reader: projeto = p projeto["id"] = p["TipoProj"] + "-" + p["NoProj"] + "-" + p["DataProj"] projeto["autores"] = [] projeto["assuntos"] = [] for a in autores: if projeto["id"] == a["TipoProj"] + "-" + a["NoProj"] + "-" + a["DataProj"]: projeto["autores"].append(a["Autor"]) for a in assuntos: if projeto["id"] == str(a["TipoProj"]) + "-" + str(a["NoProj"]) + "-" + str(a["DataProj"]): projeto["assuntos"].append(a["Assunto"]) yield projeto # client.delete() # print "Delete done" client.mapping_update( { "properties": { "autores": {"type": "string", "analyzer": "keyword"}, "assuntos": {"type": "string", "analyzer": "keyword"}, "DataProj": {"type": "date", "format": "dd/MM/YYYY"}, } } ) print "Mapping done" for row in funkystuff(projetos): client.upsert([row])