Ejemplo n.º 1
0
def upload(client, fp, encoding=None, delimiter=','):
    
    if encoding:
        os.system("iconv -f "+ encoding + " -t utf-8 " + fp + " --output utf8-" + fp)
        fo = open("utf8-"+fp)
    else:
        fo = open(fp)
    reader = csv.DictReader(fo, delimiter=delimiter)
    
    if (FIRST_TIME):
        try:
            client.delete()
            print "Delete done"
            
            client.mapping_update(
            { "properties" :
                { "orador" : 
                    { "type" : "string", "index" : "not_analyzed" },
                 "partido" :
                    { "type" : "string", "index" : "not_analyzed" },
                "estado" :
                    { "type" : "string", "index" : "not_analyzed" },
                "data" :
                    { "type" : "date", "format" : "dd/MM/YYYY" }
                } 
            })
            print 'Mapping done'
            
        except "HTTP Error 404":
            print "Creating new database"

    print "Inserting rows"
    client.upsert(funkystuff(reader))
Ejemplo n.º 2
0
def upload():
    # use Deal Number as unique id 
    def add_id(dict_):
        dict_['id'] = dict_['Deal Number']
        return dict_
    # in general should not need to delete since have unique id
    client.delete()
    client.mapping_update(mapping)
    client.upsert(map(add_id, csv.DictReader(open(fp), delimiter=';')))
Ejemplo n.º 3
0
def upload():
    # use Deal Number as unique id
    def add_id(dict_):
        dict_['id'] = dict_['Deal Number']
        return dict_

    # in general should not need to delete since have unique id
    client.delete()
    client.mapping_update(mapping)
    client.upsert(map(add_id, csv.DictReader(open(fp), delimiter=';')))
Ejemplo n.º 4
0
def upload():
    import datastore.client

    dsurl = 'http://localhost:9200/ds/opendatacensus'
    # dsurl = 'http://datahub.io/api/data/1f7dbeab-b523-4fa4-b9ab-7cfc3bd5e9f7'
    client = datastore.client.DataStoreClient(dsurl)


    print 'Deleting'
    client.delete()
    print 'Done'
    
    mapping = {
        'properties': {
            'Dataset': {
                'type': 'string',
                'index': 'not_analyzed'
            },
            'Census Country': {
                'type': 'string',
                'index': 'not_analyzed'
            },
        }
    }
    out = client.mapping_update(mapping)
    print out

    print 'Uploading to local'
    client.upload(fp)
    print 'Done'
Ejemplo n.º 5
0
def upload():
    import datastore.client

    dsurl = 'http://localhost:9200/ds/opendatacensus'
    # dsurl = 'http://datahub.io/api/data/1f7dbeab-b523-4fa4-b9ab-7cfc3bd5e9f7'
    client = datastore.client.DataStoreClient(dsurl)

    print 'Deleting'
    client.delete()
    print 'Done'

    mapping = {
        'properties': {
            'Dataset': {
                'type': 'string',
                'index': 'not_analyzed'
            },
            'Census Country': {
                'type': 'string',
                'index': 'not_analyzed'
            },
        }
    }
    out = client.mapping_update(mapping)
    print out

    print 'Uploading to local'
    client.upload(fp)
    print 'Done'
Ejemplo n.º 6
0
def upload(client, fp, encoding=None, delimiter=','):
    
    if encoding:
        os.system("iconv -f "+ encoding + " -t utf-8 " + fp + " --output utf8-" + fp)
        fo = open("utf8-"+fp)
    else:
        fo = open(fp)
    reader = csv.DictReader(fo, delimiter=delimiter)
    
    try:
        client.delete()
        print "Delete done"
        
        client.mapping_update(
        { "properties" :
            { "Poder" : 
                { "type" : "string", "index" : "not_analyzed" },
            "Orgao" :
                { "type" : "string", "index" : "not_analyzed" },
            "UO" :
                { "type" : "string", "index" : "not_analyzed" },
            "Favorecido" :
                { "type" : "string", "index" : "not_analyzed" },
            "Destino" :
                { "type" : "string", "index" : "not_analyzed" },
            "Tipo" :
                { "type" : "string", "index" : "not_analyzed" },
            "Motivo" :
                { "type" : "string", "index" : "not_analyzed" },
            "Origem" :
                { "type" : "string", "index" : "not_analyzed" },
            "DataFim" :
                { "type" : "date", "format" : "date" },
            "DataInicio" :
                { "type" : "date", "format" : "date" }
            } 
        })
        print 'Mapping done'
        
    except "HTTP Error 404":
        print "Creating new database"

    print "Inserting rows"
    client.upsert(funkystuff(reader))
        for a in assuntos:
            if projeto['id'] == str(a['TipoProj']) + '-' + str(
                    a['NoProj']) + '-' + str(a['DataProj']):
                projeto['assuntos'].append(a['Assunto'])
        yield projeto


#client.delete()
#print "Delete done"

client.mapping_update({
    "properties": {
        "autores": {
            "type": "string",
            "analyzer": "keyword"
        },
        "assuntos": {
            "type": "string",
            "analyzer": "keyword"
        },
        "DataProj": {
            "type": "date",
            "format": "dd/MM/YYYY"
        }
    }
})
print 'Mapping done'

for row in funkystuff(projetos):
    client.upsert([row])
assuntos_errors = 0
for a in assuntos:
    id_projeto = str(a['TipoProj']) + '-' + str(a['NoProj']) + '-' + str(a['DataProj'])
    try:
        lista_projetos[hex(id_projeto)]['assuntos'].append(a['Assunto'])
    except:
        #print 'Falha ao importar assuntos de ' + id_projeto
        assuntos_errors += 1

print 'Ocorreram ' + str(assuntos_errors) + ' erros na importacao dos assuntos'

def funkystuff(reader):
    for p in reader:
        yield p

client.delete()
print "Delete done"

client.mapping_update(
{ "properties" :
    {
        "autores" : { "type" : "string", "analyzer" : "keyword" },
        "assuntos" : { "type" : "string", "analyzer" : "keyword" },
        "DataProj" : { "type" : "date", "format" : "dd/MM/YYYY" }
    } 
})
print 'Mapping done'

client.upsert(funkystuff(projetos))