def relatePlaces(structure): array = structure['site']['birth'] dic = {} site_no_registered = [] for cosa in array: dom = cosa.split("/") valor = dom[len(dom) - 1] if valor.split("_")[0].isdigit(): if len(valor.split("_")) > 1: dic['day'] = valor.split("_")[0] dic['moth'] = valor.split("_")[2] else: dic['year'] = valor array = [x for x in array if x != cosa] for site in array: if "http" in site: if not CM.exists('Site', {'Url': site}): scrap_site = politic_scrapeTable(site) site_no_registered += [scrap_site] saveNode(create_structure_site(scrap_site), "site") if not CM.existsRelation('born', {}, 'person', {'Url': structure['person']['Url']}, 'site', {'Url': site}): CM.makeRelation('born', dic, 'person', {'Url': structure['person']['Url']}, 'site', {'Url': site})
def relateOrganizationsAcademic(structure): noCreated = [] array = structure['organization']['academic'] relation = 'studiedAt' node = 'institution' for key in array: if not CM.exists('organization', {'Url': key}): inst = politic_scrapeTable(key) scrap = create_structure_institution(inst) saveNode(scrap, node) noCreated += [scrap] if not CM.existsRelation(relation, {}, 'person', {'Url': structure['person']['Url']}, node, {'Url': key}): CM.makeRelation(relation, {}, 'person', {'Url': structure['person']['Url']}, node, {'Url': key}) return noCreated
def relateParty(structure): noCreated = [] array = structure['party']['name'] relation = 'belongsTo' node = 'party' for key in array: if not CM.exists(node, {'Url': key}): party = politic_scrapeTable(key) scrap = create_structure_party(party) saveNode(scrap, node) noCreated += [scrap] if not CM.existsRelation(relation, {}, 'person', {'Url': structure['person']['Url']}, node, {'Url': key}): CM.makeRelation(relation, {}, 'person', {'Url': structure['person']['Url']}, node, {'Url': key}) return noCreated
def relatedFamily(structure, stored=[], newFamily=[]): family = newFamily for key in structure['family']: for person in structure['family'][key]: if 'http' in person: if not CM.exists('person', {'Url': person}): scrap_person = politic_scrapeTable(person) family += [scrap_person] clean_person = cleanStructure( create_structure(scrap_person)) savePerson(clean_person) if not CM.existsRelation('family', {'type': key}, 'person', {'Url': structure['person']['Url']}, 'person', {'Url': person}): CM.makeRelation('family', {'type': key}, 'person', {'Url': structure['person']['Url']}, 'person', {'Url': person}) searchFamily(structure, stored, family) return family
def relateOrganizationsLaboral(structure): noCreated = [] array = structure['organization']['laboral'] relation = 'worksAt' node = 'organization' for key in array: if key.has_key('Entidad') and len(key['Entidad']) > 0: if not CM.exists(node, {'Url': key['Entidad'][0]['url']}): scrap = politic_scrapeTable(key['Entidad'][0]['url']) saveNode(create_structure_organization(scrap), node) noCreated += [scrap] if not CM.existsRelation(relation, {}, 'person', {'Url': structure['person']['Url']}, node, {'Url': key['Entidad'][0]['url']}): properties = {} properties['Cargo'] = key['Cargo'] properties['Inicio'] = key['Perido del cargo'].split( '-')[0].strip().replace(u'\xba', '').replace( ' del ', ' de ').replace('de de ', '') dateInicio = properties['Inicio'].replace('Desde el ', '').split(' de ') dateInicio = [ dateInicio[0] if len(dateInicio) == 3 else '1', dateInicio[1] if len(dateInicio) == 3 else dateInicio[0] if len(dateInicio) == 2 else '1', dateInicio[2] if len(dateInicio) == 3 else dateInicio[1] if len(dateInicio) == 2 else dateInicio[0] ] try: properties['InicioJul'] = str( gregToJul(dateInicio[0], dateInicio[1], dateInicio[2])) except: properties['InicioJul'] = '0' properties['Fin'] = key['Perido del cargo'].replace( u'\xba', '').split('-')[1].strip() if len( key['Perido del cargo'].split('-')) > 1 else "" if properties['Fin'] != "": dateFin = properties['Fin'].replace( ' del ', ' de ').replace('de de ', '').split(' de ') dateFin = [ dateFin[0] if len(dateFin) == 3 else '1', dateFin[1] if len(dateFin) == 3 else dateFin[0] if len(dateFin) == 2 else '1', dateFin[2] if len(dateFin) == 3 else dateFin[1] if len(dateFin) == 2 else dateFin[0] ] try: properties['FinJul'] = str( gregToJul( dateFin[0] if len(dateFin) == 3 else '31', dateFin[1] if len(dateFin) == 3 else '12', dateFin[2] if len(dateFin) == 3 else dateFin[0])) except: properties['FinJul'] = "0" else: properties['FinJul'] = "5000000" CM.makeRelation(relation, properties, 'person', {'Url': structure['person']['Url']}, node, {'Url': key['Entidad'][0]['url']}) return noCreated
def saveNode(structure, label): return True if CM.exists(label, {'Url': structure['Url']}) else CM.create( label, structure)
def savePerson(structure): return False if CM.exists( 'person', {'Url': structure['person']['Url']}) else CM.create( 'person', structure['person'])