def upload(): import datastore.client dsurl = 'http://localhost:9200/ds/opendatacensus' # dsurl = 'http://datahub.io/api/data/1f7dbeab-b523-4fa4-b9ab-7cfc3bd5e9f7' client = datastore.client.DataStoreClient(dsurl) print 'Deleting' client.delete() print 'Done' mapping = { 'properties': { 'Dataset': { 'type': 'string', 'index': 'not_analyzed' }, 'Census Country': { 'type': 'string', 'index': 'not_analyzed' }, } } out = client.mapping_update(mapping) print out print 'Uploading to local' client.upload(fp) print 'Done'
def upload(client, fp, encoding=None, delimiter=','): if encoding: os.system("iconv -f "+ encoding + " -t utf-8 " + fp + " --output utf8-" + fp) fo = open("utf8-"+fp) else: fo = open(fp) reader = csv.DictReader(fo, delimiter=delimiter) if (FIRST_TIME): try: client.delete() print "Delete done" client.mapping_update( { "properties" : { "orador" : { "type" : "string", "index" : "not_analyzed" }, "partido" : { "type" : "string", "index" : "not_analyzed" }, "estado" : { "type" : "string", "index" : "not_analyzed" }, "data" : { "type" : "date", "format" : "dd/MM/YYYY" } } }) print 'Mapping done' except "HTTP Error 404": print "Creating new database" print "Inserting rows" client.upsert(funkystuff(reader))
def upload(): # use Deal Number as unique id def add_id(dict_): dict_['id'] = dict_['Deal Number'] return dict_ # in general should not need to delete since have unique id client.delete() client.mapping_update(mapping) client.upsert(map(add_id, csv.DictReader(open(fp), delimiter=';')))
def upload(client, fp, encoding=None, delimiter=','): if encoding: os.system("iconv -f "+ encoding + " -t utf-8 " + fp + " --output utf8-" + fp) fo = open("utf8-"+fp) else: fo = open(fp) reader = csv.DictReader(fo, delimiter=delimiter) try: client.delete() print "Delete done" client.mapping_update( { "properties" : { "Poder" : { "type" : "string", "index" : "not_analyzed" }, "Orgao" : { "type" : "string", "index" : "not_analyzed" }, "UO" : { "type" : "string", "index" : "not_analyzed" }, "Favorecido" : { "type" : "string", "index" : "not_analyzed" }, "Destino" : { "type" : "string", "index" : "not_analyzed" }, "Tipo" : { "type" : "string", "index" : "not_analyzed" }, "Motivo" : { "type" : "string", "index" : "not_analyzed" }, "Origem" : { "type" : "string", "index" : "not_analyzed" }, "DataFim" : { "type" : "date", "format" : "date" }, "DataInicio" : { "type" : "date", "format" : "date" } } }) print 'Mapping done' except "HTTP Error 404": print "Creating new database" print "Inserting rows" client.upsert(funkystuff(reader))
assuntos_errors = 0 for a in assuntos: id_projeto = str(a['TipoProj']) + '-' + str(a['NoProj']) + '-' + str(a['DataProj']) try: lista_projetos[hex(id_projeto)]['assuntos'].append(a['Assunto']) except: #print 'Falha ao importar assuntos de ' + id_projeto assuntos_errors += 1 print 'Ocorreram ' + str(assuntos_errors) + ' erros na importacao dos assuntos' def funkystuff(reader): for p in reader: yield p client.delete() print "Delete done" client.mapping_update( { "properties" : { "autores" : { "type" : "string", "analyzer" : "keyword" }, "assuntos" : { "type" : "string", "analyzer" : "keyword" }, "DataProj" : { "type" : "date", "format" : "dd/MM/YYYY" } } }) print 'Mapping done' client.upsert(funkystuff(projetos))
a['DataProj']) try: lista_projetos[hex(id_projeto)]['assuntos'].append(a['Assunto']) except: #print 'Falha ao importar assuntos de ' + id_projeto assuntos_errors += 1 print 'Ocorreram ' + str(assuntos_errors) + ' erros na importacao dos assuntos' def funkystuff(reader): for p in reader: yield p client.delete() print "Delete done" client.mapping_update({ "properties": { "autores": { "type": "string", "analyzer": "keyword" }, "assuntos": { "type": "string", "analyzer": "keyword" }, "DataProj": { "type": "date", "format": "dd/MM/YYYY"
def upload(): dsurl = 'http://datahub.io/api/data/ac5a28ea-eb52-4b0a-a399-5dcc1becf9d9' dsurl = 'http://localhost:9200/ds/rendition-on-record' client = datastore.client.DataStoreClient(dsurl) client.delete() client.upload(jsonout)