Example #1
0
def upload(client, fp, encoding=None, delimiter=','):
    
    if encoding:
        os.system("iconv -f "+ encoding + " -t utf-8 " + fp + " --output utf8-" + fp)
        fo = open("utf8-"+fp)
    else:
        fo = open(fp)
    reader = csv.DictReader(fo, delimiter=delimiter)
    
    if (FIRST_TIME):
        try:
            client.delete()
            print "Delete done"
            
            client.mapping_update(
            { "properties" :
                { "orador" : 
                    { "type" : "string", "index" : "not_analyzed" },
                 "partido" :
                    { "type" : "string", "index" : "not_analyzed" },
                "estado" :
                    { "type" : "string", "index" : "not_analyzed" },
                "data" :
                    { "type" : "date", "format" : "dd/MM/YYYY" }
                } 
            })
            print 'Mapping done'
            
        except "HTTP Error 404":
            print "Creating new database"

    print "Inserting rows"
    client.upsert(funkystuff(reader))
Example #2
0
def upload():
    # use Deal Number as unique id 
    def add_id(dict_):
        dict_['id'] = dict_['Deal Number']
        return dict_
    # in general should not need to delete since have unique id
    client.delete()
    client.mapping_update(mapping)
    client.upsert(map(add_id, csv.DictReader(open(fp), delimiter=';')))
Example #3
0
def upload():
    # use Deal Number as unique id
    def add_id(dict_):
        dict_['id'] = dict_['Deal Number']
        return dict_

    # in general should not need to delete since have unique id
    client.delete()
    client.mapping_update(mapping)
    client.upsert(map(add_id, csv.DictReader(open(fp), delimiter=';')))
Example #4
0
def upload(client, fp, encoding=None, delimiter=','):
    
    if encoding:
        os.system("iconv -f "+ encoding + " -t utf-8 " + fp + " --output utf8-" + fp)
        fo = open("utf8-"+fp)
    else:
        fo = open(fp)
    reader = csv.DictReader(fo, delimiter=delimiter)
    
    try:
        client.delete()
        print "Delete done"
        
        client.mapping_update(
        { "properties" :
            { "Poder" : 
                { "type" : "string", "index" : "not_analyzed" },
            "Orgao" :
                { "type" : "string", "index" : "not_analyzed" },
            "UO" :
                { "type" : "string", "index" : "not_analyzed" },
            "Favorecido" :
                { "type" : "string", "index" : "not_analyzed" },
            "Destino" :
                { "type" : "string", "index" : "not_analyzed" },
            "Tipo" :
                { "type" : "string", "index" : "not_analyzed" },
            "Motivo" :
                { "type" : "string", "index" : "not_analyzed" },
            "Origem" :
                { "type" : "string", "index" : "not_analyzed" },
            "DataFim" :
                { "type" : "date", "format" : "date" },
            "DataInicio" :
                { "type" : "date", "format" : "date" }
            } 
        })
        print 'Mapping done'
        
    except "HTTP Error 404":
        print "Creating new database"

    print "Inserting rows"
    client.upsert(funkystuff(reader))
        for a in assuntos:
            if projeto['id'] == str(a['TipoProj']) + '-' + str(
                    a['NoProj']) + '-' + str(a['DataProj']):
                projeto['assuntos'].append(a['Assunto'])
        yield projeto


#client.delete()
#print "Delete done"

client.mapping_update({
    "properties": {
        "autores": {
            "type": "string",
            "analyzer": "keyword"
        },
        "assuntos": {
            "type": "string",
            "analyzer": "keyword"
        },
        "DataProj": {
            "type": "date",
            "format": "dd/MM/YYYY"
        }
    }
})
print 'Mapping done'

for row in funkystuff(projetos):
    client.upsert([row])
autor_errors = 0
for a in autores:
    id_projeto = a['TipoProj'] + '-' + a['NoProj'] + '-' + a['DataProj']
    try:
        lista_projetos[hex(id_projeto)]['autores'].append(a['Autor'])
    except:
        #print 'Falha ao importar autores de ' + id_projeto
        autor_errors += 1

print 'Ocorreram ' + str(autor_errors) + ' erros na importacao dos autores'

assuntos_errors = 0
for a in assuntos:
    id_projeto = str(a['TipoProj']) + '-' + str(a['NoProj']) + '-' + str(a['DataProj'])
    try:
        lista_projetos[hex(id_projeto)]['assuntos'].append(a['Assunto'])
    except:
        #print 'Falha ao importar assuntos de ' + id_projeto
        assuntos_errors += 1

print 'Ocorreram ' + str(assuntos_errors) + ' erros na importacao dos assuntos'

def funkystuff(reader):
    for p in reader:
        yield p

print "Delete done"
print 'Mapping done'

client.upsert(funkystuff(projetos))
Example #7
0
print 'Ocorreram ' + str(assuntos_errors) + ' erros na importacao dos assuntos'


def funkystuff(reader):
    for p in reader:
        yield p


client.delete()
print "Delete done"

client.mapping_update({
    "properties": {
        "autores": {
            "type": "string",
            "analyzer": "keyword"
        },
        "assuntos": {
            "type": "string",
            "analyzer": "keyword"
        },
        "DataProj": {
            "type": "date",
            "format": "dd/MM/YYYY"
        }
    }
})
print 'Mapping done'

client.upsert(funkystuff(projetos))
    for p in reader:
        projeto = p
        projeto["id"] = p["TipoProj"] + "-" + p["NoProj"] + "-" + p["DataProj"]
        projeto["autores"] = []
        projeto["assuntos"] = []
        for a in autores:
            if projeto["id"] == a["TipoProj"] + "-" + a["NoProj"] + "-" + a["DataProj"]:
                projeto["autores"].append(a["Autor"])
        for a in assuntos:
            if projeto["id"] == str(a["TipoProj"]) + "-" + str(a["NoProj"]) + "-" + str(a["DataProj"]):
                projeto["assuntos"].append(a["Assunto"])
        yield projeto


# client.delete()
# print "Delete done"

client.mapping_update(
    {
        "properties": {
            "autores": {"type": "string", "analyzer": "keyword"},
            "assuntos": {"type": "string", "analyzer": "keyword"},
            "DataProj": {"type": "date", "format": "dd/MM/YYYY"},
        }
    }
)
print "Mapping done"

for row in funkystuff(projetos):
    client.upsert([row])