def coleta_to_dict(self): """ Convert status object to Python dict :return: """ return conv.document2dict(self.base, self)
def process_status(self, status_id_doc): """ Process status :param id_doc: id_doc for analytics :param status_id_doc: Status id_doc :return: True or False """ try: result = self.status_base.get_document(status_id_doc) except ConnectionError as e: log.error("CONNECTION ERROR: Error processing %s\n%s", status_id_doc, e.message) time.sleep(1) result = self.status_base.get_document(status_id_doc) # JSON status_dict = conv.document2dict(self.status_base.lbbase, result) # Manually add id_doc status_dict['_metadata'] = dict() status_dict['_metadata']['id_doc'] = status_id_doc # Add status to analytics if positives are bigger than negatives update = False if status_dict.get('positives') is not None or status_dict.get('negatives') is not None: update = True if update: return status_dict else: return None
def orgao_to_dict(self): """ Convert status object to Python dict :return: """ return conv.document2dict(orgao_base.lbbase, self)
def user_to_dict(self): """ Convert status object to Python dict :return: """ return conv.document2dict(user_base.lbbase, self)
def atividade_to_dict(self): """ Convert status object to Python dict :return: """ return conv.document2dict(atividade_base.lbbase, self)
def twitter_embed(self): """ View for twitter embed :return: Twitter HTML code with oEmbed """ status_id = self.request.matchdict.get('status_id') if status_id is None: log.error("You have to supply status_id") raise HTTPError status = self.status_base.get_document(status_id) status_dict = conv.document2dict(self.status_base.lbbase, status) status_dict['_metadata'] = dict() status_dict['_metadata']['id_doc'] = status_id # Load original source source = json.loads(status_dict['source']) status_dict['source'] = source[0] # Get status oembed oembed = self.lbt.api.GetStatusOembed(id=status_dict['source']['_id'], lang='pt') # Get category if status_dict.get('events_tokens') is None: status_dict['category'] = utils.get_category( status_dict['events_tokens']) else: status_dict['category'] = utils.get_category( [status_dict['search_term']]) return {'oembed_html': oembed['html'], 'status': status_dict}
def allreports_to_dict(self): """ Convert status object to Python dict :return: """ return conv.document2dict(allreports.lbbase, self)
def test_store_location(self): """ Test location base storage """ location_base = location.LocationBase() location_lbbase = location_base.create_base() self.assertIsInstance(location_lbbase, Base) # Load data status_list = self.lbt.search(count=10) result = self.lbt.store_twitter(status_list=status_list, tokenize=True) self.assertTrue(result) status_id_list = self.status_base.get_document_ids() log.debug("Number of status found: %s", len(status_id_list)) status = self.status_base.get_document(status_id_list[0]) status_dict = conv.document2dict(self.status_base.lbbase, status) # Manually add id_doc status_dict['_metadata'] = dict() status_dict['_metadata']['id_doc'] = status._metadata.id_doc # Now try to find location status_dict = liblocation.get_location(status_dict) # Update base self.assertIsNotNone( self.status_base.documentrest.update( status_dict['_metadata']['id_doc'], json.dumps(status_dict) ) ) result = location_base.remove_base() self.assertTrue(result)
def notify_to_dict(self): """ Convert status object to Python dict :return: """ return conv.document2dict(notify_base.lbbase, self)
def desc_to_dict(self): """ Convert status object to Python dict :return: """ return conv.document2dict(desc.lbbase, self)
def twitter_embed(self): """ View for twitter embed :return: Twitter HTML code with oEmbed """ status_id = self.request.matchdict.get('status_id') if status_id is None: log.error("You have to supply status_id") raise HTTPError status = self.status_base.get_document(status_id) status_dict = conv.document2dict(self.status_base.lbbase, status) status_dict['_metadata'] = dict() status_dict['_metadata']['id_doc'] = status_id # Load original source source = json.loads(status_dict['source']) status_dict['source'] = source[0] # Get status oembed oembed = self.lbt.api.GetStatusOembed(id=status_dict['source']['_id'], lang='pt') # Get category if status_dict.get('events_tokens') is None: status_dict['category'] = utils.get_category(status_dict['events_tokens']) else: status_dict['category'] = utils.get_category([status_dict['search_term']]) return { 'oembed_html': oembed['html'], 'status': status_dict }
def process_geo_dict(self, id_doc, max_distance=50000, status_dict=None): """ Get Brasil city distance from document :param max_distance: Max distance (Meters) to consider :return: Dict with Geo information from LBGeo """ if status_dict is None: document = self.get_document(id_doc) status_dict = conv.document2dict(self.lbbase, document) if status_dict.get('location') is None: if status_dict.get('arg_structures') is not None: # Now try to find location again status_dict['_metadata'] = dict() status_dict['_metadata']['id_doc'] = id_doc status_dict = location.get_location(status_dict) if status_dict.get('location') is None: log.error("Location not available for document id = %s", id_doc) return status_dict else: log.error("Location not available for document id = %s", id_doc) return status_dict params = { 'lat': status_dict['location']['latitude'], 'lng': status_dict['location']['longitude'] } url = self.geo_url + '/city' result = requests.post(url=url, data=json.dumps(params)) # Check for Exception try: result.raise_for_status() except HTTPError as e: log.error("Connection error in id_doc = %s\n%s", id_doc, e.message) return status_dict try: city = result.json() except ValueError as e: log.error("Error parsing response for id_doc = %s\n%s", id_doc, e.message) return status_dict # Check for max distance if float(city['city_distance']) > float(max_distance): # Do not take this distance log.debug("Distance = %s bigger than maximum = %s", city['city_distance'], max_distance) return status_dict # Now update document with city status_dict['brasil_city'] = city return status_dict
def modify_aaData(self, results, base_name): model = self.model_base(base_name) data = [] for result in results: temp = document2dict(model, result) temp['id_doc'] = result._metadata.id_doc data.append(temp) return data
def process_geo_dict(self, id_doc, max_distance=50000, status_dict=None): """ Get Brasil city distance from document :param max_distance: Max distance (Meters) to consider :return: Dict with Geo information from LBGeo """ if status_dict is None: document = self.get_document(id_doc) status_dict = conv.document2dict(self.lbbase, document) if status_dict.get('location') is None: if status_dict.get('arg_structures') is not None: # Now try to find location again status_dict['_metadata'] = dict() status_dict['_metadata']['id_doc'] = id_doc status_dict = location.get_location(status_dict) if status_dict.get('location') is None: log.error("Location not available for document id = %s", id_doc) return status_dict else: log.error("Location not available for document id = %s", id_doc) return status_dict params = { 'lat': status_dict['location']['latitude'], 'lng': status_dict['location']['longitude'] } url = self.geo_url + '/city' result = requests.post( url=url, data=json.dumps(params) ) # Check for Exception try: result.raise_for_status() except HTTPError as e: log.error("Connection error in id_doc = %s\n%s", id_doc, e.message) return status_dict try: city = result.json() except ValueError as e: log.error("Error parsing response for id_doc = %s\n%s", id_doc, e.message) return status_dict # Check for max distance if float(city['city_distance']) > float(max_distance): # Do not take this distance log.debug("Distance = %s bigger than maximum = %s", city['city_distance'], max_distance) return status_dict # Now update document with city status_dict['brasil_city'] = city return status_dict
def process_hashtags(self, id_doc): result = self.get_document(id_doc) # JSON status_dict = conv.document2dict(self.lbbase, result) # Manually add id_doc status_dict['_metadata'] = dict() status_dict['_metadata']['id_doc'] = id_doc return self.process_hashtags_dict(status_dict)
def process_tokens(self, id_doc, update=True): """ Process tokens for this id_doc :param id_doc: Document to be processed :param update: Whether we should update dictionary frequency or not :return: True or False """ result = self.get_document(id_doc) # JSON status_dict = conv.document2dict(self.lbbase, result) # Manually add id_doc status_dict['_metadata'] = dict() status_dict['_metadata']['id_doc'] = id_doc # SRL tokenize tokenized = srl.srl_tokenize(status_dict['text']) if tokenized.get('arg_structures') is not None: status_dict['arg_structures'] = tokenized.get('arg_structures') if tokenized.get('tokens') is not None: status_dict['tokens'] = tokenized.get('tokens') # Now try to find location status_dict = location.get_location(status_dict) # Process tokens if selected dictionary_base = dic.DictionaryBase( dic_base=self.dictionary_base ) result = dictionary.process_tokens_dict(status_dict, dictionary_base, update=update) log.debug("Corpus da tokenização calculado. id_doc = %s", id_doc) status_dict = result['status'] # Extract hashtags status_dict = self.get_hashtags_dict(status_dict) # Calculate category status_dict = self.get_category(status_dict) # Get brasil city information status_dict = self.process_geo_dict( id_doc=id_doc, status_dict=status_dict ) # Now update document back self.documentrest.update(id_doc, json.dumps(status_dict)) return True
def process_tokens(self, id_doc, update=True): """ Process tokens for this id_doc :param id_doc: Document to be processed :param update: Whether we should update dictionary frequency or not :return: True or False """ result = self.get_document(id_doc) # JSON status_dict = conv.document2dict(self.lbbase, result) # Manually add id_doc status_dict['_metadata'] = dict() status_dict['_metadata']['id_doc'] = id_doc # SRL tokenize tokenized = srl.srl_tokenize(status_dict['text']) if tokenized.get('arg_structures') is not None: status_dict['arg_structures'] = tokenized.get('arg_structures') if tokenized.get('tokens') is not None: status_dict['tokens'] = tokenized.get('tokens') # Now try to find location status_dict = location.get_location(status_dict) # Process tokens if selected dictionary_base = dic.DictionaryBase(dic_base=self.dictionary_base) result = dictionary.process_tokens_dict(status_dict, dictionary_base, update=update) log.debug("Corpus da tokenização calculado. id_doc = %s", id_doc) status_dict = result['status'] # Extract hashtags status_dict = self.get_hashtags_dict(status_dict) # Calculate category status_dict = self.get_category(status_dict) # Get brasil city information status_dict = self.process_geo_dict(id_doc=id_doc, status_dict=status_dict) # Now update document back self.documentrest.update(id_doc, json.dumps(status_dict)) return True
def process_status_categories(self, status_id_doc): """ Process status :param status_id_doc: Status id_doc :return: Status dict stored """ try: result = self.status_base.get_document(status_id_doc) except ConnectionError as e: log.error("CONNECTION ERROR: Error processing %s\n%s", status_id_doc, e.message) # Try again in one second time.sleep(1) status_dict = self.process_status_categories(status_id_doc) return status_dict # JSON status_dict = conv.document2dict(self.status_base.lbbase, result) # Manually add id_doc status_dict['_metadata'] = dict() status_dict['_metadata']['id_doc'] = status_id_doc return status_dict
def blacklist_to_dict(self): """ Convert status object to Python dict :return: """ return conv.document2dict(blacklist_base.lbbase, self)
def crimes_to_dict(self): """ Convert crimes object to Python dict :return: dict for crime """ return conv.document2dict(self.crimes_base.lbbase, self)
def analytics_to_dict(self): """ Convert analytics object to Python dict :return: dict for crime """ return conv.document2dict(self.analytics_base.lbbase, self)
def update_status_files(self, lb_intercommunication_obj, key=None, value=None): """ Atualiza a estrutura de arquivos """ add_key = key list_update_object = [] value_attr = value for obj in self.get_files_doc(lb_intercommunication_obj): dict_document = document2dict(LbLibLightBaseSetCs().liblightbase_schemes_df('db_reg_anot_teses_arq'), obj ) list_update_object = [] for assunt_vinc in dict_document.get('mg_assunt_vinc'): if str(assunt_vinc.get('int_id_doc_assunt')) in self.list_id_reg_teses: assunt_vinc[add_key] = value_attr list_update_object.append(assunt_vinc) # dict_lbdoc = { # #"lb_ctrl_sh_slt": "_metadata", # "lb_ctrl_op": "db_search", # "lb_ctrl_db": "db_reg_anot_teses_arq", # "lb_ctrl_qry": 'str_file_hash = ' + obj.str_file_hash, # "lb_ctrl_cookie": lb_intercommunication_obj.user_credentials["lb_cookie"] # } if key == 'bool_ativ': #verifica se todos da lista é False bool_ativ_file = not [mg_assunt.get('bool_ativ', True) for mg_assunt in list_update_object].count(False) == len(list_update_object) addParh = { "path": "bool_ativ_file", "mode": "update", "fn": None, "args": [bool_ativ_file] } elif key == 'bool_del': #bool_del_file = False #length = len(list_update_object) bool_del_file = [mg_assunt.get('bool_del', False) for mg_assunt in list_update_object].count(True) == len(list_update_object) addParh = { "path": "bool_del_file", "mode": "update", "fn": None, "args": [bool_del_file] } operation_lbdoc = { 'lb_ctrl_op': 'update_collection', 'lb_ctrl_db': 'db_reg_anot_teses_arq', 'search': {'literal':'str_file_hash = \'' + obj.str_file_hash + '\'', 'limit': 1 }, 'path_operation': [ { "path": "mg_assunt_vinc", "mode":"update", "fn": None, "args": [list_update_object] } ], 'lb_ctrl_cookie': lb_intercommunication_obj.user_credentials["lb_cookie"] } operation_lbdoc['path_operation'].append(addParh) submit_operations_df_return = LBDOC_OBJ.lbdoc_obj.submit_operations_df(None, operation_lbdoc, False) if submit_operations_df_return.lbdoc_return_objs.lbdoc_return_objs[-1].\ status == "success": continue
def update_status_files(self, lb_intercommunication_obj, key=None, value=None): """ Atualiza a estrutura de arquivos """ add_key = key list_update_object = [] value_attr = value for obj in self.get_files_doc(lb_intercommunication_obj): dict_document = document2dict( LbLibLightBaseSetCs().liblightbase_schemes_df( 'db_reg_anot_teses_arq'), obj) list_update_object = [] for assunt_vinc in dict_document.get('mg_assunt_vinc'): if str(assunt_vinc.get( 'int_id_doc_assunt')) in self.list_id_reg_teses: assunt_vinc[add_key] = value_attr list_update_object.append(assunt_vinc) # dict_lbdoc = { # #"lb_ctrl_sh_slt": "_metadata", # "lb_ctrl_op": "db_search", # "lb_ctrl_db": "db_reg_anot_teses_arq", # "lb_ctrl_qry": 'str_file_hash = ' + obj.str_file_hash, # "lb_ctrl_cookie": lb_intercommunication_obj.user_credentials["lb_cookie"] # } if key == 'bool_ativ': #verifica se todos da lista é False bool_ativ_file = not [ mg_assunt.get('bool_ativ', True) for mg_assunt in list_update_object ].count(False) == len(list_update_object) addParh = { "path": "bool_ativ_file", "mode": "update", "fn": None, "args": [bool_ativ_file] } elif key == 'bool_del': #bool_del_file = False #length = len(list_update_object) bool_del_file = [ mg_assunt.get('bool_del', False) for mg_assunt in list_update_object ].count(True) == len(list_update_object) addParh = { "path": "bool_del_file", "mode": "update", "fn": None, "args": [bool_del_file] } operation_lbdoc = { 'lb_ctrl_op': 'update_collection', 'lb_ctrl_db': 'db_reg_anot_teses_arq', 'search': { 'literal': 'str_file_hash = \'' + obj.str_file_hash + '\'', 'limit': 1 }, 'path_operation': [{ "path": "mg_assunt_vinc", "mode": "update", "fn": None, "args": [list_update_object] }], 'lb_ctrl_cookie': lb_intercommunication_obj.user_credentials["lb_cookie"] } operation_lbdoc['path_operation'].append(addParh) submit_operations_df_return = LBDOC_OBJ.lbdoc_obj.submit_operations_df( None, operation_lbdoc, False) if submit_operations_df_return.lbdoc_return_objs.lbdoc_return_objs[-1].\ status == "success": continue
def test_x(self): Pessoa = self.base.metaclass() Gmulti = self.base.metaclass('gmulti') Dependente = self.base.metaclass('dependente') lbbase = self.base class Y(Gmulti): @property def teste(self): return Gmulti.teste.__get__(self) @teste.setter def teste(self, v): Gmulti.teste.__set__(self, v) class X(Pessoa): def __init__(self, **args): super(X, self).__init__(**args) @property def nome(self): return Pessoa.nome.__get__(self) @nome.setter def nome(self, v): Pessoa.nome.__set__(self, v) @property def dependente(self): return Pessoa.dependente.__get__(self) @dependente.setter def dependente(self, v): Pessoa.dependente.__set__(self, v) def set_dependentes(self): g1 = dict(teste='ww', teste2=['dgfkdsgsghslkdghsk', 'dsgjsd.,gjsd.gjs']) g1_obj = dict2document(lbbase, g1, Gmulti) g2 = dict(teste='ww', teste2=['dgfkdsgsghslkdghsk', 'dsgjsd.,gjsd.gjs']) g2_obj = dict2document(lbbase, g2, Gmulti) g3 = dict(teste='ww', teste2=['dgfkdsgsghslkdghsk', 'dsgjsd.,gjsd.gjs']) g3_obj = dict2document(lbbase, g3, Gmulti) g4 = dict(teste='ww', teste2=['dgfkdsgsghslkdghsk', 'dsgjsd.,gjsd.gjs']) g4_obj = dict2document(lbbase, g4, Gmulti) d1 = dict(nome_dep='xxx', gmulti=[g1, g2]) d1_obj = dict2document(lbbase, d1, Dependente) d2 = dict(nome_dep='xxx', gmulti=[g3, g4]) d2_obj = dict2document(lbbase, d2, Dependente) d1 = [d1_obj, d2_obj] self.dependente = d1 x = X(nome='aa', carros=['d']) x.set_dependentes() j = document2json(self.base, x, indent=4) self.assertIsNotNone(j) fd = open('/tmp/document2.json', 'w+') fd.write(j) fd.close() p = document2dict(self.base, x) y = X(**p) self.assertIsInstance(y, X)
def store_twitter(self, status_list, tokenize=True): """ Store twitter status in LB Database :param status_list: List of status to be stored :param tokenize: Whether we should tokenize it directly or not :return: True or None if it isn't possible to store it """ for elm in status_list: status_json = self.status_to_json([elm]) status = Status( origin='twitter', inclusion_date=datetime.datetime.now(), inclusion_datetime=datetime.datetime.now(), search_term=self.term, text=elm.text, source=status_json, base=self.status_base ) retorno = status.create_status() if retorno is None: # log.error("Error inserting status %s on Base" % elm.text) log.error("Error inserting status on Base" % elm.text) continue status_dict = conv.document2dict(self.status_base.lbbase, status) # Manually add id_doc status_dict['_metadata'] = dict() status_dict['_metadata']['id_doc'] = retorno if tokenize: # SRL tokenize if status_dict.get('text') is not None: tokenized = srl.srl_tokenize(status_dict['text']) if tokenized.get('arg_structures') is not None: status_dict['arg_structures'] = tokenized.get('arg_structures') if tokenized.get('tokens') is not None: status_dict['tokens'] = tokenized.get('tokens') # Now try to find location status_dict = location.get_location(status_dict) # Process tokens if selected result = dictionary.process_tokens_dict(status_dict, self.dictionary_base) log.info("Corpus da tokenizacao calculado. id_doc = %s", retorno) status_dict = result['status'] # Extract hashtags status_dict = self.status_base.get_hashtags_dict(status_dict) # Calculate category status_dict = status.get_category(status_dict) # Get brasil city information status_dict = self.status_base.process_geo_dict( id_doc=retorno, status_dict=status_dict ) # Now update document back self.status_base.documentrest.update(retorno, json.dumps(status_dict)) return retorno
def insert_from_status(lbstatus, dictionary_base=None, outfile=None): # try: # assert isinstance(lbstatus, StatusBase) # except AssertionError as e: # log.error("You have to supply a status instance\n%s", e) # return task_queue = Queue() done_queue = Queue() processes = int(lbstatus.processes) # As we are reprocessing tokens, it is necessary to clear frequency dic_base = dictionary.DictionaryBase(dic_base=dictionary_base) dic_base.remove_base() dic_base.create_base() # Lust send the GET request rest_url = lbstatus.documentrest.rest_url rest_url += "/" + lbstatus.lbbase._metadata.name + "/doc/" id_status_list = lbstatus.get_document_ids() if id_status_list is None: log.error("No status found. Import some status first") return False for elm in id_status_list: params = dict(status_id=elm, outfile=outfile, rest_url=rest_url + elm) task_queue.put(params) for i in range(processes): # Permite o processamento paralelo dos tokens Process(target=worker, args=(task_queue, done_queue)).start() # Load dictionary dic = corpora.Dictionary() if outfile is not None: if os.path.exists(outfile): dic.load(outfile) max_size = lbstatus.max_size # Merge results with this dictionary log.debug("Processing results from dictionary creation") for i in range(len(id_status_list)): # Update status after processing processed = done_queue.get() dic2 = processed['dic'] result = processed['status'] status_dict = conv.document2dict(lbstatus.lbbase, result) try: retorno = lbstatus.documentrest.update(params['status_id'], json.dumps(status_dict)) except HTTPError as e: log.error("Error updating document id = %d\n%s" % (params['status_id'], e.message)) dic.merge_with(dic2) log.debug("111111111111111111111: Novo dicionário %s", dic) if outfile is not None: # Serialize if it grows bigger than the amount of size if sys.getsizeof(dic, 0) >= max_size: log.info("Serializing dict as it reached max size %s", max_size) dic.save(outfile) if outfile is not None: dic.save(outfile) # Tell child processes to stop for i in range(processes): task_queue.put('STOP') return True
def insert_from_status(lbstatus, dictionary_base=None, outfile=None): # try: # assert isinstance(lbstatus, StatusBase) # except AssertionError as e: # log.error("You have to supply a status instance\n%s", e) # return task_queue = Queue() done_queue = Queue() processes = int(lbstatus.processes) # As we are reprocessing tokens, it is necessary to clear frequency dic_base = dictionary.DictionaryBase(dic_base=dictionary_base) dic_base.remove_base() dic_base.create_base() # Lust send the GET request rest_url = lbstatus.documentrest.rest_url rest_url += "/" + lbstatus.lbbase._metadata.name + "/doc/" id_status_list = lbstatus.get_document_ids() if id_status_list is None: log.error("No status found. Import some status first") return False for elm in id_status_list: params = dict( status_id=elm, outfile=outfile, rest_url=rest_url + elm ) task_queue.put(params) for i in range(processes): # Permite o processamento paralelo dos tokens Process(target=worker, args=(task_queue, done_queue)).start() # Load dictionary dic = corpora.Dictionary() if outfile is not None: if os.path.exists(outfile): dic.load(outfile) max_size = lbstatus.max_size # Merge results with this dictionary log.debug("Processing results from dictionary creation") for i in range(len(id_status_list)): # Update status after processing processed = done_queue.get() dic2 = processed['dic'] result = processed['status'] status_dict = conv.document2dict(lbstatus.lbbase, result) try: retorno = lbstatus.documentrest.update(params['status_id'], json.dumps(status_dict)) except HTTPError as e: log.error("Error updating document id = %d\n%s" % (params['status_id'], e.message)) dic.merge_with(dic2) log.debug("111111111111111111111: Novo dicionário %s", dic) if outfile is not None: # Serialize if it grows bigger than the amount of size if sys.getsizeof(dic, 0) >= max_size: log.info("Serializing dict as it reached max size %s", max_size) dic.save(outfile) if outfile is not None: dic.save(outfile) # Tell child processes to stop for i in range(processes): task_queue.put('STOP') return True
def store_twitter(self, status_list, tokenize=True): """ Store twitter status in LB Database :param status_list: List of status to be stored :param tokenize: Whether we should tokenize it directly or not :return: True or None if it isn't possible to store it """ for elm in status_list: status_json = self.status_to_json([elm]) status = Status(origin='twitter', inclusion_date=datetime.datetime.now(), inclusion_datetime=datetime.datetime.now(), search_term=self.term, text=elm.text, source=status_json, base=self.status_base) retorno = status.create_status() if retorno is None: # log.error("Error inserting status %s on Base" % elm.text) log.error("Error inserting status on Base" % elm.text) continue status_dict = conv.document2dict(self.status_base.lbbase, status) # Manually add id_doc status_dict['_metadata'] = dict() status_dict['_metadata']['id_doc'] = retorno if tokenize: # SRL tokenize if status_dict.get('text') is not None: tokenized = srl.srl_tokenize(status_dict['text']) if tokenized.get('arg_structures') is not None: status_dict['arg_structures'] = tokenized.get( 'arg_structures') if tokenized.get('tokens') is not None: status_dict['tokens'] = tokenized.get('tokens') # Now try to find location status_dict = location.get_location(status_dict) # Process tokens if selected result = dictionary.process_tokens_dict(status_dict, self.dictionary_base) log.info("Corpus da tokenizacao calculado. id_doc = %s", retorno) status_dict = result['status'] # Extract hashtags status_dict = self.status_base.get_hashtags_dict(status_dict) # Calculate category status_dict = status.get_category(status_dict) # Get brasil city information status_dict = self.status_base.process_geo_dict( id_doc=retorno, status_dict=status_dict) # Now update document back self.status_base.documentrest.update(retorno, json.dumps(status_dict)) return retorno