def __init__(self, visualization): self.visualization = visualization if isinstance(self.visualization, dict): self.visualization_id = self.visualization['visualization_id'] else: self.visualization_id = self.visualization.visualization_id self.search_index = ElasticsearchIndex() self.logger = logging.getLogger(__name__) self.cache = Cache()
def __init__(self, datastream): self.datastream = datastream if isinstance(self.datastream, dict): self.datastream_id = self.datastream['datastream_id'] else: self.datastream_id = self.datastream.id #self.datastream_revision = datastream.last_published_revision self.search_index = ElasticsearchIndex() self.cache = Cache()
class VisualizationElasticsearchDAO(VisualizationSearchDAO): """ class for manage access to datasets' elasticsearch documents """ def __init__(self, visualization_revision): self.visualization_revision = visualization_revision self.search_index = ElasticsearchIndex() def add(self): return self.search_index.indexit(self._build_document()) def remove(self): self.search_index.delete_documents([{"type": self._get_type(), "docid": self._get_id()}])
class VisualizationElasticsearchDAO(VisualizationSearchDAO): """ class for manage access to datasets' elasticsearch documents """ def __init__(self, visualization_revision): self.visualization_revision=visualization_revision self.search_index = ElasticsearchIndex() def add(self): return self.search_index.indexit(self._build_document()) def remove(self): self.search_index.delete_documents([{"type": self._get_type(), "docid": self._get_id()}])
class DatasetElasticsearchDAO(DatasetSearchIndexDAO): """ class for manage access to datasets' ElasticSearch documents """ def __init__(self, dataset_revision): self.logger = logging.getLogger(__name__) self.dataset_revision=dataset_revision self.search_index = ElasticsearchIndex() def add(self): return self.search_index.indexit(self._build_document()) def remove(self): self.search_index.delete_documents([{"type": self._get_type(), "docid": self._get_id()}])
class DatastreamElasticsearchDAO(DatastreamSearchDAO): """ class for manage access to datastreams elasticsearch documents """ def __init__(self, datastream_revision): self.datastream_revision=datastream_revision self.search_index = ElasticsearchIndex() def add(self): output=self.search_index.indexit(self._build_document()) return (self.datastream_revision.id, self.datastream_revision.datastream.id, output) def remove(self): return self.search_index.delete_documents([{"type": self._get_type(), "docid": self._get_id()}])
class DatasetElasticsearchDAO(DatasetSearchIndexDAO): """ class for manage access to datasets' ElasticSearch documents """ def __init__(self, dataset_revision): self.logger = logging.getLogger(__name__) self.dataset_revision = dataset_revision self.search_index = ElasticsearchIndex() def add(self): return self.search_index.indexit(self._build_document()) def remove(self): self.search_index.delete_documents([{ "type": self._get_type(), "docid": self._get_id() }])
class DatastreamElasticsearchDAO(DatastreamSearchDAO): """ class for manage access to datastreams elasticsearch documents """ def __init__(self, revision): self.revision = revision self.search_index = ElasticsearchIndex() def add(self): output = self.search_index.indexit(self._build_document()) return (self.revision.id, self.revision.datastream.id, output) def remove(self): return self.search_index.delete_documents([{ "type": self._get_type(), "docid": self._get_id() }])
def __init__(self, visualization): self.visualization=visualization if isinstance(self.visualization, dict): self.visualization_id = self.visualization['visualization_id'] else: self.visualization_id = self.visualization.visualization_id self.search_index = ElasticsearchIndex() self.logger=logging.getLogger(__name__) self.cache=Cache()
def __init__(self, datastream): self.datastream = datastream if isinstance(self.datastream, dict): self.datastream_id = self.datastream['datastream_id'] else: self.datastream_id = self.datastream.id #self.datastream_revision = datastream.last_published_revision self.search_index = ElasticsearchIndex() self.cache=Cache()
def handle(self, *args, **options): if not options['all'] and not options['datasets'] and not options[ 'datastreams'] and not options[ 'visualizations'] and not options['dashboards']: print "\nUse: " print "\n\treindex --<all|datasets|datastreams|visualizations|dashboards> [--flush] [--debug]\n\n" print "\t--all\t\t\treindex all resourses" print "\t--only-datasets\t\treindex datasets resourses" print "\t--only-datastreams\t\treindex datastreams resourses" print "\t--only-visualizations\treindex visualizations resourses" print "\t--only-dashboards\t\treindex dashboards resourses" print "\t--flush\t\t\tflush index" print "\t--debug\t\t\tdebug|verbose" print "\n" return if options['debug']: print "[Otions]" for i in options.keys(): print "\t", i.ljust(15), ": ", options[i] if options['flush']: # destruye el index ElasticsearchIndex().flush_index() # conectamos con elastic self.es = ElasticsearchIndex() # index resources if options['all']: options['datasets'] = True options['datastreams'] = True options['visualizations'] = True options['dashboards'] = True self.options = options self.index_datasets() self.index_datastreams() self.index_visualizations() self.index_dashboards()
def update_timestamp(self, response, resource): doubts = [ 'is_file' in resource and resource['is_file'], 'collect_type' in resource and resource['collect_type'] == choices.CollectTypeChoices.URL, self.dao_pk == 'datastream_revision_id' ] if all(doubts): if type(response) == type({}) and "fTimestamp" in response.keys(): timestamp=response['fTimestamp'] else: timestamp=int(round(time.time() * 1000)) try: es = ElasticsearchIndex() doc_id = es.search(doc_type="ds", query={ "query": { "match": {"revision_id": resource['revision_id']}}}, fields="_id")['hits']['hits'][0]['_id'] es.update({'doc': {'fields': {'timestamp': timestamp}}, 'docid': doc_id, 'type': "ds"}) except IndexError: pass except Exception as e: logger.warning('[ENGINE COMMAND] error desconocido %s ' % str(e))
def __init__(self): logger.info('New %sIndex INIT' % settings.USE_SEARCHINDEX) if settings.USE_SEARCHINDEX == 'searchify': self.index = SearchifyIndex() elif settings.USE_SEARCHINDEX == 'elasticsearch': self.index = ElasticsearchIndex() # elif settings.USE_SEARCHINDEX == 'test': # self.search_dao = DatastreamSearchDAO(datastream_revision) else: raise SearchIndexNotFoundException()
def handle(self, *args, **options): if not options['all'] and not options['datasets'] and not options['datastreams'] and not options['visualizations'] and not options['dashboards']: print "\nUse: " print "\n\treindex --<all|datasets|datastreams|visualizations|dashboards> [--flush] [--debug]\n\n" print "\t--all\t\t\treindex all resourses" print "\t--only-datasets\t\treindex datasets resourses" print "\t--only-datastreams\t\treindex datastreams resourses" print "\t--only-visualizations\treindex visualizations resourses" print "\t--only-dashboards\t\treindex dashboards resourses" print "\t--flush\t\t\tflush index" print "\t--debug\t\t\tdebug|verbose" print "\n" return if options['debug']: print "[Otions]" for i in options.keys(): print "\t",i.ljust(15),": ",options[i] if options['flush']: # destruye el index ElasticsearchIndex().flush_index() # conectamos con elastic self.es = ElasticsearchIndex() # index resources if options['all']: options['datasets']=True options['datastreams']=True options['visualizations']=True options['dashboards']=True self.options=options self.index_datasets() self.index_datastreams() self.index_visualizations() self.index_dashboards()
def _request(self, query): url = self._get_url() response = None try: params = urllib.urlencode(query) self.logger.info("URL: %s Params: %s query: %s method: %s" % (url, params, query, self.method)) try: if self.method == 'GET': response = urllib.urlopen(url + '?' + params) elif self.method == 'POST': response = urllib.urlopen(url, params) except Exception, e: self.logger.error('Error trying to access to %s | %s (%s) ' % (url, str(params), str(e))) raise if response: if response.getcode() == 200: ret = response.read() if len(response.info().getplist()) > 0: mimetype = '{0}; {1}'.format( response.info().gettype(), response.info().getplist()[0]) else: mimetype = 'application; json' # solo si es un json if mimetype.split(";")[0] == 'application/json': try: # obtenemos el json para sacar el ftimestamp aux = json.loads(ret) if type(aux) == type( {}) and "fTimestamp" in aux.keys(): pids = filter( None, map(lambda x: x[0] == 'pId' and x[1], query)) if len(pids) > 0: pId = pids[0] if settings.DEBUG: self.logger.info( '[ENGINE COMMAND] Salvamos el fTimestamp de %s (pId: %s)' % (aux["fTimestamp"], pId)) try: es = ElasticsearchIndex() doc_id = es.search( doc_type="ds", query={ "query": { "match": { "revision_id": pId } } }, fields="_id" )['hits']['hits'][0]['_id'] es.update({ 'doc': { 'fields': { 'timestamp': aux['fTimestamp'] } }, 'docid': doc_id, 'type': "ds" }) except IndexError: self.logger.warning( '[ENGINE COMMAND] revision id %s no existe en indexador, posiblemente no este publicado' ) except ValueError: self.logger.error( '[ENGINE COMMAND] ret no es un json') return ret, mimetype raise IOError('Error code %d at %s+%s' % (response.getcode(), url, str(params)))
def __init__(self, dataset_revision): self.logger = logging.getLogger(__name__) self.dataset_revision = dataset_revision self.search_index = ElasticsearchIndex()
class DatastreamHitsDAO(): """class for manage access to Hits in DB and index""" doc_type = "ds" from_cache = False # cache ttl, 1 hora TTL=3600 CHANNEL_TYPE=("web","api") def __init__(self, datastream): self.datastream = datastream if isinstance(self.datastream, dict): self.datastream_id = self.datastream['datastream_id'] else: self.datastream_id = self.datastream.id #self.datastream_revision = datastream.last_published_revision self.search_index = ElasticsearchIndex() self.cache=Cache() def add(self, channel_type): """agrega un hit al datastream. """ # TODO: Fix temporal por el paso de DT a DAO. # Es problema es que por momentos el datastream viene de un queryset y otras veces de un DAO y son objetos # distintos try: datastream_id = self.datastream.datastream_id except: datastream_id = self.datastream['datastream_id'] try: guid = self.datastream.guid except: guid = self.datastream['guid'] try: hit=DataStreamHits.objects.create(datastream_id=datastream_id, channel_type=channel_type) except IntegrityError: # esta correcto esta excepcion? raise DataStreamNotFoundException() logger.info("DatastreamHitsDAO hit! (guid: %s)" % ( guid)) # armo el documento para actualizar el index. doc={'docid':"DS::%s" % guid, "type": "ds", "script": "ctx._source.fields.%s_hits+=1" % self.CHANNEL_TYPE[channel_type]} return self.search_index.update(doc) def count(self, channel_type=ChannelTypes.WEB): return DataStreamHits.objects.filter(datastream_id=self.datastream_id, channel_type=channel_type).count() def count_by_days(self, day=30, channel_type=None): """trae un dict con los hits totales de los ultimos day y los hits particulares de los días desde day hasta today""" # no sé si es necesario esto if day < 1: return {} # tenemos la fecha de inicio start_date=datetime.today()-timedelta(days=day) # tomamos solo la parte date truncate_date = connection.ops.date_trunc_sql('day', 'created_at') qs=DataStreamHits.objects.filter(datastream_id=self.datastream_id,created_at__gte=start_date) if channel_type: qs=qs.filter(channel_type=channel_type) hits=qs.extra(select={'_date': truncate_date, "fecha": 'DATE(created_at)'}).values("fecha").order_by("created_at").annotate(hits=Count("created_at")) control=[ date.today()-timedelta(days=x) for x in range(day-1,0,-1)] control.append(date.today()) for i in hits: try: control.remove(i['fecha']) except ValueError: pass hits=list(hits) for i in control: hits.append({"fecha": i, "hits": 0}) hits = sorted(hits, key=lambda k: k['fecha']) # transformamos las fechas en isoformat hits=map(self._date_isoformat, hits) return hits def _date_isoformat(self, row): row['fecha']=row['fecha'].isoformat() return row
class VisualizationHitsDAO(): """class for manage access to Hits in DB and index""" doc_type = "vz" from_cache = False # cache ttl, 1 hora TTL=3600 CHANNEL_TYPE=("web","api") def __init__(self, visualization): self.visualization=visualization if isinstance(self.visualization, dict): self.visualization_id = self.visualization['visualization_id'] else: self.visualization_id = self.visualization.visualization_id self.search_index = ElasticsearchIndex() self.logger=logging.getLogger(__name__) self.cache=Cache() def add(self, channel_type): """agrega un hit al datastream. """ # TODO: Fix temporal por el paso de DT a DAO. # Es problema es que por momentos el datastream viene de un queryset y otras veces de un DAO y son objetos # distintos try: guid = self.visualization.guid except: guid = self.visualization['guid'] try: hit=VisualizationHits.objects.create(visualization_id=self.visualization_id, channel_type=channel_type) except IntegrityError: # esta correcto esta excepcion? raise VisualizationNotFoundException() self.logger.info("VisualizationHitsDAO hit! (id: %s)" % ( self.visualization_id)) # armo el documento para actualizar el index. doc={'docid':"%s::%s" % (self.doc_type.upper(), guid), "type": self.doc_type, "script": "ctx._source.fields.hits+=1", } self.search_index.update(doc) # ahora sumamos al hit del channel especifico doc['script']="ctx._source.fields.%s_hits+=1" % self.CHANNEL_TYPE[channel_type] return self.search_index.update(doc) def count(self, channel_type=None): """devuelve cuantos hits tiene por canal o en general :param: channel_type: filtra por canal :return: int""" query=VisualizationHits.objects.filter(visualization__id=self.visualization_id) if channel_type in (0,1): query=query.filter(channel_type=channel_type) return query.count() def count_by_day(self,day): """retorna los hits de un día determinado""" # si es datetime, usar solo date if type(day) == type(datetime.today()): day=day.date() cache_key="%s_hits_%s_by_date_%s" % ( self.doc_type, self.visualization.guid, str(day)) hits = self._get_cache(cache_key) # si el día particular no esta en el caché, lo guarda # salvo que sea el parametro pasado sea de hoy, lo guarda en el cache pero usa siempre el de la DB if not hits or day == date.today(): hits=VisualizationHits.objects.filter(visualization=self.visualization, created_at__startswith=day).count() self._set_cache(cache_key, hits) return (date,hits) def count_by_days(self, day=30, channel_type=None): """trae un dict con los hits totales de los ultimos day y los hits particulares de los días desde day hasta today""" # no sé si es necesario esto if day < 1: return {} # tenemos la fecha de inicio start_date=datetime.today()-timedelta(days=day) # tomamos solo la parte date truncate_date = connection.ops.date_trunc_sql('day', 'created_at') qs=VisualizationHits.objects.filter(visualization_id=self.visualization_id,created_at__gte=start_date) if channel_type: qs=qs.filter(channel_type=channel_type) hits=qs.extra(select={'_date': truncate_date, "fecha": 'DATE(created_at)'}).values("fecha").order_by("created_at").annotate(hits=Count("created_at")) control=[ date.today()-timedelta(days=x) for x in range(day-1,0,-1)] control.append(date.today()) for i in hits: try: control.remove(i['fecha']) except ValueError: pass hits=list(hits) for i in control: hits.append({"fecha": i, "hits": 0}) hits = sorted(hits, key=lambda k: k['fecha']) # transformamos las fechas en isoformat hits=map(self._date_isoformat, hits) return hits def _date_isoformat(self, row): row['fecha']=row['fecha'].isoformat() return row
def __init__(self, datastream): self.datastream = datastream #self.datastream_revision = datastream.last_published_revision self.search_index = ElasticsearchIndex() self.logger=logging.getLogger(__name__) self.cache=Cache()
def __init__(self, datastream_revision): self.datastream_revision=datastream_revision self.search_index = ElasticsearchIndex()
def __init__(self, dataset_revision): self.logger = logging.getLogger(__name__) self.dataset_revision=dataset_revision self.search_index = ElasticsearchIndex()
class DatastreamHitsDAO(): """class for manage access to Hits in DB and index""" doc_type = "ds" from_cache = False # cache ttl, 1 hora TTL = 3600 CHANNEL_TYPE = ("web", "api") def __init__(self, datastream): self.datastream = datastream if isinstance(self.datastream, dict): self.datastream_id = self.datastream['datastream_id'] else: self.datastream_id = self.datastream.id #self.datastream_revision = datastream.last_published_revision self.search_index = ElasticsearchIndex() self.cache = Cache() def add(self, channel_type): """agrega un hit al datastream. """ # TODO: Fix temporal por el paso de DT a DAO. # Es problema es que por momentos el datastream viene de un queryset y otras veces de un DAO y son objetos # distintos try: datastream_id = self.datastream.datastream_id except: datastream_id = self.datastream['datastream_id'] try: guid = self.datastream.guid except: guid = self.datastream['guid'] try: hit = DataStreamHits.objects.create(datastream_id=datastream_id, channel_type=channel_type) except IntegrityError: # esta correcto esta excepcion? raise DataStreamNotFoundException() # armo el documento para actualizar el index. doc = { 'docid': "DS::%s" % guid, "type": "ds", "script": "ctx._source.fields.%s_hits+=1" % self.CHANNEL_TYPE[channel_type] } return self.search_index.update(doc) def count(self, channel_type=ChannelTypes.WEB): return DataStreamHits.objects.filter( datastream_id=self.datastream_id, channel_type=channel_type).count() def count_by_days(self, day=30, channel_type=None): """trae un dict con los hits totales de los ultimos day y los hits particulares de los días desde day hasta today""" # no sé si es necesario esto if day < 1: return {} # tenemos la fecha de inicio start_date = now() - timedelta(days=day) # tomamos solo la parte date truncate_date = connection.ops.date_trunc_sql('day', 'created_at') qs = DataStreamHits.objects.filter(datastream_id=self.datastream_id, created_at__gte=start_date) if channel_type: qs = qs.filter(channel_type=channel_type) hits = qs.extra(select={ '_date': truncate_date, "fecha": 'DATE(created_at)' }).values("fecha").order_by("created_at").annotate( hits=Count("created_at")) control = [ now().date() - timedelta(days=x) for x in range(day - 1, 0, -1) ] control.append(now().date()) for i in hits: try: control.remove(i['fecha']) except ValueError: pass hits = list(hits) for i in control: hits.append({"fecha": i, "hits": 0}) hits = sorted(hits, key=lambda k: k['fecha']) # transformamos las fechas en isoformat hits = map(self._date_isoformat, hits) return hits def _date_isoformat(self, row): row['fecha'] = row['fecha'].isoformat() return row
def __init__(self, visualization_revision): self.visualization_revision = visualization_revision self.search_index = ElasticsearchIndex()
class VisualizationHitsDAO: """class for manage access to Hits in DB and index""" doc_type = "vz" from_cache = False # cache ttl, 1 hora TTL = 3600 def __init__(self, visualization): self.visualization = visualization self.search_index = ElasticsearchIndex() self.logger = logging.getLogger(__name__) self.cache = Cache() def add(self, channel_type): """agrega un hit al datastream. """ try: hit = VisualizationHits.objects.create( visualization_id=self.visualization.visualization_id, channel_type=channel_type ) except IntegrityError: # esta correcto esta excepcion? raise VisualizationNotFoundException() self.logger.info("VisualizationHitsDAO hit! (guid: %s)" % (self.datastream.guid)) # armo el documento para actualizar el index. doc = { "docid": "%s::%s" % (self.doc_type.upper(), self.visualization.guid), "type": self.doc_type, "script": "ctx._source.fields.hits+=1", } return self.search_index.update(doc) def count(self): return VisualizationHits.objects.filter(visualization_id=self.visualization.visualization_id).count() def _get_cache(self, cache_key): cache = self.cache.get(cache_key) return cache def _set_cache(self, cache_key, value): return self.cache.set(cache_key, value, self.TTL) def count_by_day(self, day): """retorna los hits de un día determinado""" # si es datetime, usar solo date if type(day) == type(datetime.today()): day = day.date() cache_key = "%s_hits_%s_by_date_%s" % (self.doc_type, self.visualization.guid, str(day)) hits = self._get_cache(cache_key) # si el día particular no esta en el caché, lo guarda # salvo que sea el parametro pasado sea de hoy, lo guarda en el cache pero usa siempre el de la DB if not hits or day == date.today(): hits = VisualizationHits.objects.filter( visualization=self.visualization, created_at__startswith=day ).count() self._set_cache(cache_key, hits) return (date, hits) def count_by_days(self, day=30, channel_type=None): """trae un dict con los hits totales de los ultimos day y los hits particulares de los días desde day hasta today""" # no sé si es necesario esto if day < 1: return {} cache_key = "%s_hits_%s_%s" % (self.doc_type, self.visualization.guid, day) if channel_type: cache_key += "_channel_type_%s" % channel_type hits = self._get_cache(cache_key) # me cachendié! no esta en la cache if not hits: # tenemos la fecha de inicio start_date = datetime.today() - timedelta(days=day) # tomamos solo la parte date truncate_date = connection.ops.date_trunc_sql("day", "created_at") qs = VisualizationHits.objects.filter(visualization=self.visualization, created_at__gte=start_date) if channel_type: qs = qs.filter(channel_type=channel_type) hits = ( qs.extra(select={"_date": truncate_date, "fecha": "DATE(created_at)"}) .values("fecha") .order_by("created_at") .annotate(hits=Count("created_at")) ) control = [date.today() - timedelta(days=x) for x in range(day - 1, 0, -1)] control.append(date.today()) for i in hits: try: control.remove(i["fecha"]) except ValueError: pass hits = list(hits) for i in control: hits.append({"fecha": i, "hits": 0}) hits = sorted(hits, key=lambda k: k["fecha"]) # transformamos las fechas en isoformat hits = map(self._date_isoformat, hits) # lo dejamos, amablemente, en la cache! self._set_cache(cache_key, json.dumps(hits, cls=DjangoJSONEncoder)) self.from_cache = False else: hits = json.loads(hits) self.from_cache = True return hits def _date_isoformat(self, row): row["fecha"] = row["fecha"].isoformat() return row
def __init__(self, visualization): self.visualization = visualization self.search_index = ElasticsearchIndex() self.logger = logging.getLogger(__name__) self.cache = Cache()
class VisualizationHitsDAO(): """class for manage access to Hits in DB and index""" doc_type = "vz" from_cache = False # cache ttl, 1 hora TTL = 3600 CHANNEL_TYPE = ("web", "api") def __init__(self, visualization): self.visualization = visualization if isinstance(self.visualization, dict): self.visualization_id = self.visualization['visualization_id'] else: self.visualization_id = self.visualization.visualization_id self.search_index = ElasticsearchIndex() self.logger = logging.getLogger(__name__) self.cache = Cache() def add(self, channel_type): """agrega un hit al datastream. :param channel_type: """ # TODO: Fix temporal por el paso de DT a DAO. # Es problema es que por momentos el datastream viene de un queryset y otras veces de un DAO y son objetos # distintos try: guid = self.visualization.guid except: guid = self.visualization['guid'] try: hit = VisualizationHits.objects.create( visualization_id=self.visualization_id, channel_type=channel_type) except IntegrityError: # esta correcto esta excepcion? raise VisualizationNotFoundException() self.logger.info("VisualizationHitsDAO hit! (id: %s)" % (self.visualization_id)) # armo el documento para actualizar el index. doc = { 'docid': "%s::%s" % (self.doc_type.upper(), guid), "type": self.doc_type, "script": "ctx._source.fields.hits+=1", } self.search_index.update(doc) # ahora sumamos al hit del channel especifico doc['script'] = "ctx._source.fields.%s_hits+=1" % self.CHANNEL_TYPE[ channel_type] return self.search_index.update(doc) def count(self, channel_type=None): """devuelve cuantos hits tiene por canal o en general :param: channel_type: filtra por canal :return: int""" query = VisualizationHits.objects.filter( visualization__id=self.visualization_id) if channel_type in (0, 1): query = query.filter(channel_type=channel_type) return query.count() def count_by_day(self, day): """retorna los hits de un día determinado :param day: """ # si es datetime, usar solo date if type(day) == type(datetime.today()): day = day.date() cache_key = "%s_hits_%s_by_date_%s" % ( self.doc_type, self.visualization.guid, str(day)) hits = self._get_cache(cache_key) # si el día particular no esta en el caché, lo guarda # salvo que sea el parametro pasado sea de hoy, lo guarda en el cache pero usa siempre el de la DB if not hits or day == date.today(): hits = VisualizationHits.objects.filter( visualization=self.visualization, created_at__startswith=day).count() self._set_cache(cache_key, hits) return (date, hits) def count_by_days(self, day=30, channel_type=None): """trae un dict con los hits totales de los ultimos day y los hits particulares de los días desde day hasta today :param channel_type: :param day: """ # no sé si es necesario esto if day < 1: return {} # tenemos la fecha de inicio start_date = datetime.today() - timedelta(days=day) # tomamos solo la parte date truncate_date = connection.ops.date_trunc_sql('day', 'created_at') qs = VisualizationHits.objects.filter( visualization_id=self.visualization_id, created_at__gte=start_date) if channel_type: qs = qs.filter(channel_type=channel_type) hits = qs.extra(select={ '_date': truncate_date, "fecha": 'DATE(created_at)' }).values("fecha").order_by("created_at").annotate( hits=Count("created_at")) control = [ date.today() - timedelta(days=x) for x in range(day - 1, 0, -1) ] control.append(date.today()) for i in hits: try: control.remove(i['fecha']) except ValueError: pass hits = list(hits) for i in control: hits.append({"fecha": i, "hits": 0}) hits = sorted(hits, key=lambda k: k['fecha']) # transformamos las fechas en isoformat hits = map(self._date_isoformat, hits) return hits def _date_isoformat(self, row): row['fecha'] = row['fecha'].isoformat() return row
def __init__(self, datastream): self.datastream = datastream #self.datastream_revision = datastream.last_published_revision self.search_index = ElasticsearchIndex() self.cache=Cache()
def __init__(self, datastream_revision): self.datastream_revision = datastream_revision self.search_index = ElasticsearchIndex()
def __init__(self, revision): self.revision = revision self.search_index = ElasticsearchIndex()
class Command(BaseCommand): help = "Index datasets." option_list = BaseCommand.option_list + ( make_option('--all', action='store_true', dest='all', default=False, help='Reindex resources'), make_option('--flush', action='store_true', dest='flush', default=False, help='flush index'), make_option('--only-datasets', action='store_true', dest='datasets', default=False, help='reindex datasets'), make_option('--only-datastreams', action='store_true', dest='datastreams', default=False, help='reindex datastreams'), make_option('--only-visualizations', action='store_true', dest='visualizations', default=False, help='reindex visualization'), make_option('--only-dashboards', action='store_true', dest='dashboards', default=False, help='reindex dashboards'), make_option('--debug', action='store_true', dest='debug', default=False, help='debug'), ) def handle(self, *args, **options): if not options['all'] and not options['datasets'] and not options['datastreams'] and not options['visualizations'] and not options['dashboards']: print "\nUse: " print "\n\treindex --<all|datasets|datastreams|visualizations|dashboards> [--flush] [--debug]\n\n" print "\t--all\t\t\treindex all resourses" print "\t--only-datasets\t\treindex datasets resourses" print "\t--only-datastreams\t\treindex datastreams resourses" print "\t--only-visualizations\treindex visualizations resourses" print "\t--only-dashboards\t\treindex dashboards resourses" print "\t--flush\t\t\tflush index" print "\t--debug\t\t\tdebug|verbose" print "\n" return if options['debug']: print "[Otions]" for i in options.keys(): print "\t",i.ljust(15),": ",options[i] if options['flush']: # destruye el index ElasticsearchIndex().flush_index() # conectamos con elastic self.es = ElasticsearchIndex() # index resources if options['all']: options['datasets']=True options['datastreams']=True options['visualizations']=True options['dashboards']=True self.options=options self.index_datasets() self.index_datastreams() self.index_visualizations() self.index_dashboards() def index_datasets(self): if self.options['datasets']: if self.options['debug']: print "[Iniciando datasets]" for dataset in Dataset.objects.filter(last_published_revision__status=StatusChoices.PUBLISHED): try: datasetrevision=dataset.last_published_revision search_dao = DatasetSearchDAOFactory().create(datasetrevision) search_dao.add() except: print "[ERROR dt] Fallo al indexar Dataset. ID: {} Account: {}".format(dataset.id, dataset.user.account.name) def index_visualizations(self): if self.options['visualizations']: if self.options['debug']: print "[Iniciando visualizations]" for vz in Visualization.objects.filter(last_published_revision__status=StatusChoices.PUBLISHED): vz_revision=vz.last_published_revision search_dao = VisualizationSearchDAOFactory().create(vz_revision) try: search_dao.add() except VisualizationI18n.MultipleObjectsReturned: print "[ERROR vz] VisualizationI18n.MultipleObjectsReturned (vz.id= %s)" % vz.id continue except AttributeError: print "[ERROR vz] self.visualization_revision.visualization.datastream.last_published_revision == None (vz.id= %s, ds= %s)" % (vz.id, vz.datastream.id) continue except: print "[ERROR vz] Probablemente VisualizationI18n.MultipleObjectsReturned (vz.id= %s)" % vz.id continue h = VisualizationHitsDAO(vz_revision) doc={ 'docid': "VZ::%s" % vz.guid, "type": "vz", "doc": { "fields": { "hits": h.count(), "web_hits": h.count(channel_type=0), "api_hits": h.count(channel_type=1) } } } try: self.es.update(doc) except: if self.options['debug']: print "[ERROR]: No se pudo ejecutar: ",doc def index_datastreams(self): if self.options['datastreams']: if self.options['debug']: print "[Iniciando datastreams]" for datastream in DataStream.objects.filter(last_published_revision__status=StatusChoices.PUBLISHED): try: datastreamrevision=datastream.last_published_revision datastream_rev = DataStreamDBDAO().get(datastreamrevision.user, datastream_revision_id=datastreamrevision.id, published=True ) search_dao = DatastreamSearchDAOFactory().create(datastreamrevision) try: search_dao.add() except DatastreamI18n.MultipleObjectsReturned: print "[ERROR ds] DatastreamI18n.MultipleObjectsReturned (ds.id= %s)" % datastream.id continue except AttributeError: print "[ERROR ds] self.datastream.last_published_revision == None (ds= %s)" % datastream.id continue h = DatastreamHitsDAO(datastream_rev) doc={ 'docid': "DS::%s" % datastreamrevision.datastream.guid, "type": "ds", "doc": { "fields": { "hits": h.count(), "web_hits": h.count(channel_type=0), "api_hits": h.count(channel_type=1) } } } try: self.es.update(doc) except: if self.options['debug']: print "[ERROR]: No se pudo ejecutar: ",doc except: print "[ERROR ds] Fallo al indexar ID {} de la cuenta: {}".format(datastream.id, datastream.user.account.name) def index_dashboards(self): if self.options['dashboards']: if self.options['debug']: print "[Iniciando dashboards]" for plugin in DatalPluginPoint.get_active_with_att('reindex'): plugin.reindex(self.es)
def handle(self, *args, **options): # index resources if options['reindex']: # destruye el index ElasticsearchIndex().flush_index() es = ElasticsearchIndex() for dataset in Dataset.objects.filter(last_published_revision__status=StatusChoices.PUBLISHED): datasetrevision=dataset.last_published_revision search_dao = DatasetSearchDAOFactory().create(datasetrevision) search_dao.add() for vz in Visualization.objects.filter(last_published_revision__status=StatusChoices.PUBLISHED): vz_revision=vz.last_published_revision search_dao = VisualizationSearchDAOFactory().create(vz_revision) search_dao.add() h = VisualizationHitsDAO(vz_revision) doc={ 'docid': "VZ::%s" % vz.guid, "type": "vz", "doc": { "fields": { "hits": h.count(), "web_hits": h.count(channel_type=0), "api_hits": h.count(channel_type=1) } } } try: es.update(doc) except: pass # TODO Hay que usar el metodo query del DAO for datastream in DataStream.objects.filter(last_published_revision__status=StatusChoices.PUBLISHED): datastreamrevision=datastream.last_published_revision datastream_rev = DataStreamDBDAO().get( datastreamrevision.user.language, datastream_revision_id=datastreamrevision.id, published=True ) search_dao = DatastreamSearchDAOFactory().create(datastreamrevision) search_dao.add() h = DatastreamHitsDAO(datastream_rev) doc={ 'docid': "DS::%s" % datastreamrevision.datastream.guid, "type": "ds", "doc": { "fields": { "hits": h.count(), "web_hits": h.count(channel_type=0), "api_hits": h.count(channel_type=1) } } } try: es.update(doc) except: pass for plugin in DatalPluginPoint.get_active_with_att('reindex'): plugin.reindex(es)
class Command(BaseCommand): help = "Index datasets." option_list = BaseCommand.option_list + ( make_option('--all', action='store_true', dest='all', default=False, help='Reindex resources'), make_option('--flush', action='store_true', dest='flush', default=False, help='flush index'), make_option('--only-datasets', action='store_true', dest='datasets', default=False, help='reindex datasets'), make_option('--only-datastreams', action='store_true', dest='datastreams', default=False, help='reindex datastreams'), make_option('--only-visualizations', action='store_true', dest='visualizations', default=False, help='reindex visualization'), make_option('--only-dashboards', action='store_true', dest='dashboards', default=False, help='reindex dashboards'), make_option('--debug', action='store_true', dest='debug', default=False, help='debug'), ) def handle(self, *args, **options): if not options['all'] and not options['datasets'] and not options[ 'datastreams'] and not options[ 'visualizations'] and not options['dashboards']: print "\nUse: " print "\n\treindex --<all|datasets|datastreams|visualizations|dashboards> [--flush] [--debug]\n\n" print "\t--all\t\t\treindex all resourses" print "\t--only-datasets\t\treindex datasets resourses" print "\t--only-datastreams\t\treindex datastreams resourses" print "\t--only-visualizations\treindex visualizations resourses" print "\t--only-dashboards\t\treindex dashboards resourses" print "\t--flush\t\t\tflush index" print "\t--debug\t\t\tdebug|verbose" print "\n" return if options['debug']: print "[Otions]" for i in options.keys(): print "\t", i.ljust(15), ": ", options[i] if options['flush']: # destruye el index ElasticsearchIndex().flush_index() # conectamos con elastic self.es = ElasticsearchIndex() # index resources if options['all']: options['datasets'] = True options['datastreams'] = True options['visualizations'] = True options['dashboards'] = True self.options = options self.index_datasets() self.index_datastreams() self.index_visualizations() self.index_dashboards() def index_datasets(self): if self.options['datasets']: if self.options['debug']: print "[Iniciando datasets]" for dataset in Dataset.objects.filter( last_published_revision__status=StatusChoices.PUBLISHED): try: datasetrevision = dataset.last_published_revision search_dao = DatasetSearchDAOFactory().create( datasetrevision) search_dao.add() except: print "[ERROR dt] Fallo al indexar Dataset. ID: {} Account: {}".format( dataset.id, dataset.user.account.name) def index_visualizations(self): if self.options['visualizations']: if self.options['debug']: print "[Iniciando visualizations]" for vz in Visualization.objects.filter( last_published_revision__status=StatusChoices.PUBLISHED): vz_revision = vz.last_published_revision search_dao = VisualizationSearchDAOFactory().create( vz_revision) try: search_dao.add() except VisualizationI18n.MultipleObjectsReturned: print "[ERROR vz] VisualizationI18n.MultipleObjectsReturned (vz.id= %s)" % vz.id continue except AttributeError: print "[ERROR vz] self.visualization_revision.visualization.datastream.last_published_revision == None (vz.id= %s, ds= %s)" % ( vz.id, vz.datastream.id) continue except: print "[ERROR vz] Probablemente VisualizationI18n.MultipleObjectsReturned (vz.id= %s)" % vz.id continue h = VisualizationHitsDAO(vz_revision) doc = { 'docid': "VZ::%s" % vz.guid, "type": "vz", "doc": { "fields": { "hits": h.count(), "web_hits": h.count(channel_type=0), "api_hits": h.count(channel_type=1) } } } try: self.es.update(doc) except: if self.options['debug']: print "[ERROR]: No se pudo ejecutar: ", doc def index_datastreams(self): if self.options['datastreams']: if self.options['debug']: print "[Iniciando datastreams]" for datastream in DataStream.objects.filter( last_published_revision__status=StatusChoices.PUBLISHED): try: datastreamrevision = datastream.last_published_revision datastream_rev = DataStreamDBDAO().get( datastreamrevision.user, datastream_revision_id=datastreamrevision.id, published=True) search_dao = DatastreamSearchDAOFactory().create( datastreamrevision) try: search_dao.add() except DatastreamI18n.MultipleObjectsReturned: print "[ERROR ds] DatastreamI18n.MultipleObjectsReturned (ds.id= %s)" % datastream.id continue except AttributeError: print "[ERROR ds] self.datastream.last_published_revision == None (ds= %s)" % datastream.id continue h = DatastreamHitsDAO(datastream_rev) doc = { 'docid': "DS::%s" % datastreamrevision.datastream.guid, "type": "ds", "doc": { "fields": { "hits": h.count(), "web_hits": h.count(channel_type=0), "api_hits": h.count(channel_type=1) } } } try: self.es.update(doc) except: if self.options['debug']: print "[ERROR]: No se pudo ejecutar: ", doc except: print "[ERROR ds] Fallo al indexar ID {} de la cuenta: {}".format( datastream.id, datastream.user.account.name) def index_dashboards(self): if self.options['dashboards']: if self.options['debug']: print "[Iniciando dashboards]" for plugin in DatalPluginPoint.get_active_with_att('reindex'): plugin.reindex(self.es)