コード例 #1
0
ファイル: visualizations.py プロジェクト: anukat2015/datal
 def __init__(self, visualization):
     self.visualization = visualization
     if isinstance(self.visualization, dict):
         self.visualization_id = self.visualization['visualization_id']
     else:
         self.visualization_id = self.visualization.visualization_id
     self.search_index = ElasticsearchIndex()
     self.logger = logging.getLogger(__name__)
     self.cache = Cache()
コード例 #2
0
 def __init__(self, datastream):
     self.datastream = datastream
     if isinstance(self.datastream, dict):
         self.datastream_id = self.datastream['datastream_id']
     else:
         self.datastream_id = self.datastream.id
     #self.datastream_revision = datastream.last_published_revision
     self.search_index = ElasticsearchIndex()
     self.cache = Cache()
コード例 #3
0
ファイル: visualizations.py プロジェクト: PhiRequiem/datal
class VisualizationElasticsearchDAO(VisualizationSearchDAO):
    """ class for manage access to datasets' elasticsearch documents """

    def __init__(self, visualization_revision):
        self.visualization_revision = visualization_revision
        self.search_index = ElasticsearchIndex()

    def add(self):
        return self.search_index.indexit(self._build_document())

    def remove(self):
        self.search_index.delete_documents([{"type": self._get_type(), "docid": self._get_id()}])
コード例 #4
0
class VisualizationElasticsearchDAO(VisualizationSearchDAO):
    """ class for manage access to datasets' elasticsearch documents """

    def __init__(self, visualization_revision):
        self.visualization_revision=visualization_revision
        self.search_index = ElasticsearchIndex()
        
    def add(self):
        return self.search_index.indexit(self._build_document())
        
    def remove(self):
        self.search_index.delete_documents([{"type": self._get_type(), "docid": self._get_id()}])
コード例 #5
0
ファイル: datasets.py プロジェクト: PhiRequiem/datal
class DatasetElasticsearchDAO(DatasetSearchIndexDAO):
    """ class for manage access to datasets' ElasticSearch documents """

    def __init__(self, dataset_revision):
        self.logger = logging.getLogger(__name__)
        self.dataset_revision=dataset_revision
        self.search_index = ElasticsearchIndex()
        
    def add(self):
        return self.search_index.indexit(self._build_document())
        
    def remove(self):
        self.search_index.delete_documents([{"type": self._get_type(), "docid": self._get_id()}])
コード例 #6
0
ファイル: datastreams.py プロジェクト: mxabierto/datal
class DatastreamElasticsearchDAO(DatastreamSearchDAO):
    """ class for manage access to datastreams elasticsearch documents """

    def __init__(self, datastream_revision):
        self.datastream_revision=datastream_revision
        self.search_index = ElasticsearchIndex()
        
    def add(self):
        output=self.search_index.indexit(self._build_document())

        return (self.datastream_revision.id, self.datastream_revision.datastream.id, output)
        
    def remove(self):
        return self.search_index.delete_documents([{"type": self._get_type(), "docid": self._get_id()}])
コード例 #7
0
ファイル: datasets.py プロジェクト: anukat2015/datal
class DatasetElasticsearchDAO(DatasetSearchIndexDAO):
    """ class for manage access to datasets' ElasticSearch documents """
    def __init__(self, dataset_revision):
        self.logger = logging.getLogger(__name__)
        self.dataset_revision = dataset_revision
        self.search_index = ElasticsearchIndex()

    def add(self):
        return self.search_index.indexit(self._build_document())

    def remove(self):
        self.search_index.delete_documents([{
            "type": self._get_type(),
            "docid": self._get_id()
        }])
コード例 #8
0
class DatastreamElasticsearchDAO(DatastreamSearchDAO):
    """ class for manage access to datastreams elasticsearch documents """
    def __init__(self, revision):
        self.revision = revision
        self.search_index = ElasticsearchIndex()

    def add(self):
        output = self.search_index.indexit(self._build_document())

        return (self.revision.id, self.revision.datastream.id, output)

    def remove(self):
        return self.search_index.delete_documents([{
            "type": self._get_type(),
            "docid": self._get_id()
        }])
コード例 #9
0
ファイル: visualizations.py プロジェクト: mxabierto/datal
 def __init__(self, visualization):
     self.visualization=visualization
     if isinstance(self.visualization, dict):
         self.visualization_id = self.visualization['visualization_id']
     else:
         self.visualization_id = self.visualization.visualization_id
     self.search_index = ElasticsearchIndex()
     self.logger=logging.getLogger(__name__)
     self.cache=Cache()
コード例 #10
0
ファイル: datastreams.py プロジェクト: mxabierto/datal
 def __init__(self, datastream):
     self.datastream = datastream
     if isinstance(self.datastream, dict):
         self.datastream_id = self.datastream['datastream_id']
     else:
         self.datastream_id = self.datastream.id 
     #self.datastream_revision = datastream.last_published_revision
     self.search_index = ElasticsearchIndex()
     self.cache=Cache()
コード例 #11
0
    def handle(self, *args, **options):

        if not options['all'] and not options['datasets'] and not options[
                'datastreams'] and not options[
                    'visualizations'] and not options['dashboards']:
            print "\nUse: "
            print "\n\treindex --<all|datasets|datastreams|visualizations|dashboards> [--flush] [--debug]\n\n"
            print "\t--all\t\t\treindex all resourses"
            print "\t--only-datasets\t\treindex datasets resourses"
            print "\t--only-datastreams\t\treindex datastreams resourses"
            print "\t--only-visualizations\treindex visualizations resourses"
            print "\t--only-dashboards\t\treindex dashboards resourses"
            print "\t--flush\t\t\tflush index"
            print "\t--debug\t\t\tdebug|verbose"
            print "\n"
            return

        if options['debug']:
            print "[Otions]"
            for i in options.keys():
                print "\t", i.ljust(15), ": ", options[i]

        if options['flush']:
            # destruye el index
            ElasticsearchIndex().flush_index()

        # conectamos con elastic
        self.es = ElasticsearchIndex()

        # index resources
        if options['all']:
            options['datasets'] = True
            options['datastreams'] = True
            options['visualizations'] = True
            options['dashboards'] = True

        self.options = options

        self.index_datasets()
        self.index_datastreams()
        self.index_visualizations()
        self.index_dashboards()
コード例 #12
0
ファイル: views.py プロジェクト: jnaudon/datal
    def update_timestamp(self, response, resource):
        doubts = [
            'is_file' in resource and resource['is_file'],  
            'collect_type' in resource and resource['collect_type'] == choices.CollectTypeChoices.URL,
            self.dao_pk == 'datastream_revision_id'
        ]
        if all(doubts):
            if type(response) == type({}) and "fTimestamp" in response.keys():
                timestamp=response['fTimestamp']
            else:
                timestamp=int(round(time.time() * 1000))

            try:
                es = ElasticsearchIndex()
                doc_id = es.search(doc_type="ds", query={ "query": { "match": {"revision_id": resource['revision_id']}}}, fields="_id")['hits']['hits'][0]['_id']
                es.update({'doc': {'fields': {'timestamp': timestamp}}, 'docid': doc_id, 'type': "ds"})
            except IndexError:
                pass
            except Exception as e:
                logger.warning('[ENGINE COMMAND] error desconocido %s ' % str(e))
コード例 #13
0
    def __init__(self):

        logger.info('New %sIndex INIT' % settings.USE_SEARCHINDEX)
        if settings.USE_SEARCHINDEX == 'searchify':
            self.index = SearchifyIndex()
        elif settings.USE_SEARCHINDEX == 'elasticsearch':
            self.index = ElasticsearchIndex()
#        elif settings.USE_SEARCHINDEX == 'test':
#            self.search_dao = DatastreamSearchDAO(datastream_revision)
        else:
            raise SearchIndexNotFoundException()
コード例 #14
0
ファイル: index.py プロジェクト: anukat2015/datal
    def handle(self, *args, **options):

        if not options['all'] and not options['datasets'] and not options['datastreams'] and not options['visualizations'] and not options['dashboards']:
            print "\nUse: "
            print "\n\treindex --<all|datasets|datastreams|visualizations|dashboards> [--flush] [--debug]\n\n"
            print "\t--all\t\t\treindex all resourses"
            print "\t--only-datasets\t\treindex datasets resourses"
            print "\t--only-datastreams\t\treindex datastreams resourses"
            print "\t--only-visualizations\treindex visualizations resourses"
            print "\t--only-dashboards\t\treindex dashboards resourses"
            print "\t--flush\t\t\tflush index"
            print "\t--debug\t\t\tdebug|verbose"
            print "\n"
            return
        


        if options['debug']:
            print "[Otions]"
            for i in options.keys():
                print "\t",i.ljust(15),": ",options[i]

        if options['flush']:
            # destruye el index
            ElasticsearchIndex().flush_index()

        # conectamos con elastic
        self.es = ElasticsearchIndex()

        # index resources
        if options['all']:
            options['datasets']=True
            options['datastreams']=True
            options['visualizations']=True
            options['dashboards']=True

        self.options=options

        self.index_datasets()
        self.index_datastreams()
        self.index_visualizations()
        self.index_dashboards()
コード例 #15
0
    def _request(self, query):
        url = self._get_url()
        response = None

        try:
            params = urllib.urlencode(query)

            self.logger.info("URL: %s Params: %s query: %s method: %s" %
                             (url, params, query, self.method))

            try:
                if self.method == 'GET':
                    response = urllib.urlopen(url + '?' + params)
                elif self.method == 'POST':
                    response = urllib.urlopen(url, params)
            except Exception, e:
                self.logger.error('Error trying to access to %s | %s (%s) ' %
                                  (url, str(params), str(e)))
                raise

            if response:
                if response.getcode() == 200:
                    ret = response.read()
                    if len(response.info().getplist()) > 0:
                        mimetype = '{0}; {1}'.format(
                            response.info().gettype(),
                            response.info().getplist()[0])
                    else:
                        mimetype = 'application; json'

                    # solo si es un json
                    if mimetype.split(";")[0] == 'application/json':
                        try:
                            # obtenemos el json para sacar el ftimestamp
                            aux = json.loads(ret)
                            if type(aux) == type(
                                {}) and "fTimestamp" in aux.keys():

                                pids = filter(
                                    None,
                                    map(lambda x: x[0] == 'pId' and x[1],
                                        query))
                                if len(pids) > 0:
                                    pId = pids[0]
                                    if settings.DEBUG:
                                        self.logger.info(
                                            '[ENGINE COMMAND] Salvamos el fTimestamp de %s (pId: %s)'
                                            % (aux["fTimestamp"], pId))

                                    try:
                                        es = ElasticsearchIndex()
                                        doc_id = es.search(
                                            doc_type="ds",
                                            query={
                                                "query": {
                                                    "match": {
                                                        "revision_id": pId
                                                    }
                                                }
                                            },
                                            fields="_id"
                                        )['hits']['hits'][0]['_id']
                                        es.update({
                                            'doc': {
                                                'fields': {
                                                    'timestamp':
                                                    aux['fTimestamp']
                                                }
                                            },
                                            'docid': doc_id,
                                            'type': "ds"
                                        })
                                    except IndexError:
                                        self.logger.warning(
                                            '[ENGINE COMMAND] revision id %s no existe en indexador, posiblemente no este publicado'
                                        )
                        except ValueError:
                            self.logger.error(
                                '[ENGINE COMMAND] ret no es un json')

                    return ret, mimetype

            raise IOError('Error code %d at %s+%s' %
                          (response.getcode(), url, str(params)))
コード例 #16
0
ファイル: datasets.py プロジェクト: anukat2015/datal
 def __init__(self, dataset_revision):
     self.logger = logging.getLogger(__name__)
     self.dataset_revision = dataset_revision
     self.search_index = ElasticsearchIndex()
コード例 #17
0
ファイル: datastreams.py プロジェクト: mxabierto/datal
class DatastreamHitsDAO():
    """class for manage access to Hits in DB and index"""

    doc_type = "ds"
    from_cache = False

    # cache ttl, 1 hora
    TTL=3600 

    CHANNEL_TYPE=("web","api")

    def __init__(self, datastream):
        self.datastream = datastream
        if isinstance(self.datastream, dict):
            self.datastream_id = self.datastream['datastream_id']
        else:
            self.datastream_id = self.datastream.id 
        #self.datastream_revision = datastream.last_published_revision
        self.search_index = ElasticsearchIndex()
        self.cache=Cache()

    def add(self,  channel_type):
        """agrega un hit al datastream. """

        # TODO: Fix temporal por el paso de DT a DAO.
        # Es problema es que por momentos el datastream viene de un queryset y otras veces de un DAO y son objetos
        # distintos
        try:
            datastream_id = self.datastream.datastream_id
        except:
            datastream_id = self.datastream['datastream_id']

        try:
            guid = self.datastream.guid
        except:
            guid = self.datastream['guid']

        try:
            hit=DataStreamHits.objects.create(datastream_id=datastream_id, channel_type=channel_type)
        except IntegrityError:
            # esta correcto esta excepcion?
            raise DataStreamNotFoundException()

        logger.info("DatastreamHitsDAO hit! (guid: %s)" % ( guid))

        # armo el documento para actualizar el index.
        doc={'docid':"DS::%s" % guid,
                "type": "ds",
                "script": "ctx._source.fields.%s_hits+=1" % self.CHANNEL_TYPE[channel_type]}

        return self.search_index.update(doc)

    def count(self, channel_type=ChannelTypes.WEB):
        return DataStreamHits.objects.filter(datastream_id=self.datastream_id, channel_type=channel_type).count()

    def count_by_days(self, day=30, channel_type=None):
        """trae un dict con los hits totales de los ultimos day y los hits particulares de los días desde day hasta today"""

        # no sé si es necesario esto
        if day < 1:
            return {}

        # tenemos la fecha de inicio
        start_date=datetime.today()-timedelta(days=day)

        # tomamos solo la parte date
        truncate_date = connection.ops.date_trunc_sql('day', 'created_at')

        qs=DataStreamHits.objects.filter(datastream_id=self.datastream_id,created_at__gte=start_date)

        if channel_type:
            qs=qs.filter(channel_type=channel_type)

        hits=qs.extra(select={'_date': truncate_date, "fecha": 'DATE(created_at)'}).values("fecha").order_by("created_at").annotate(hits=Count("created_at"))

        control=[ date.today()-timedelta(days=x) for x in range(day-1,0,-1)]
        control.append(date.today())
        
        for i in hits:
            try:
                control.remove(i['fecha'])
            except ValueError:
                pass

        hits=list(hits)
            
        for i in control:
            hits.append({"fecha": i, "hits": 0})

        hits = sorted(hits, key=lambda k: k['fecha']) 

        # transformamos las fechas en isoformat
        hits=map(self._date_isoformat, hits)
        return hits

    def _date_isoformat(self, row):
        row['fecha']=row['fecha'].isoformat()
        return row
コード例 #18
0
ファイル: visualizations.py プロジェクト: mxabierto/datal
class VisualizationHitsDAO():
    """class for manage access to Hits in DB and index"""

    doc_type = "vz"
    from_cache = False

    # cache ttl, 1 hora
    TTL=3600 

    CHANNEL_TYPE=("web","api")

    def __init__(self, visualization):
        self.visualization=visualization
        if isinstance(self.visualization, dict):
            self.visualization_id = self.visualization['visualization_id']
        else:
            self.visualization_id = self.visualization.visualization_id
        self.search_index = ElasticsearchIndex()
        self.logger=logging.getLogger(__name__)
        self.cache=Cache()

    def add(self,  channel_type):
        """agrega un hit al datastream. """

        # TODO: Fix temporal por el paso de DT a DAO.
        # Es problema es que por momentos el datastream viene de un queryset y otras veces de un DAO y son objetos
        # distintos
        try:
            guid = self.visualization.guid
        except:
            guid = self.visualization['guid']

        try:
            hit=VisualizationHits.objects.create(visualization_id=self.visualization_id, channel_type=channel_type)
        except IntegrityError:
            # esta correcto esta excepcion?
            raise VisualizationNotFoundException()

        self.logger.info("VisualizationHitsDAO hit! (id: %s)" % ( self.visualization_id))

        # armo el documento para actualizar el index.
        doc={'docid':"%s::%s" % (self.doc_type.upper(), guid),
                "type": self.doc_type,
                "script": "ctx._source.fields.hits+=1",
                }
        self.search_index.update(doc)

        # ahora sumamos al hit del channel especifico
        doc['script']="ctx._source.fields.%s_hits+=1" % self.CHANNEL_TYPE[channel_type]

        return self.search_index.update(doc)

    def count(self, channel_type=None):
        """devuelve cuantos hits tiene por canal o en general
            :param: channel_type: filtra por canal
            :return: int"""

        query=VisualizationHits.objects.filter(visualization__id=self.visualization_id)

        if channel_type in (0,1):
            query=query.filter(channel_type=channel_type)

        return query.count()

    def count_by_day(self,day):
        """retorna los hits de un día determinado"""

        # si es datetime, usar solo date
        if type(day) == type(datetime.today()):
            day=day.date()

        cache_key="%s_hits_%s_by_date_%s" % ( self.doc_type, self.visualization.guid, str(day))

        hits = self._get_cache(cache_key)

        # si el día particular no esta en el caché, lo guarda
        # salvo que sea el parametro pasado sea de hoy, lo guarda en el cache pero usa siempre el de la DB
        if not hits or day == date.today():
            hits=VisualizationHits.objects.filter(visualization=self.visualization, created_at__startswith=day).count()

            self._set_cache(cache_key, hits)

        return (date,hits)

    def count_by_days(self, day=30, channel_type=None):
        """trae un dict con los hits totales de los ultimos day y los hits particulares de los días desde day hasta today"""

        # no sé si es necesario esto
        if day < 1:
            return {}

        # tenemos la fecha de inicio
        start_date=datetime.today()-timedelta(days=day)

        # tomamos solo la parte date
        truncate_date = connection.ops.date_trunc_sql('day', 'created_at')

        qs=VisualizationHits.objects.filter(visualization_id=self.visualization_id,created_at__gte=start_date)

        if channel_type:
            qs=qs.filter(channel_type=channel_type)

        hits=qs.extra(select={'_date': truncate_date, "fecha": 'DATE(created_at)'}).values("fecha").order_by("created_at").annotate(hits=Count("created_at"))

        control=[ date.today()-timedelta(days=x) for x in range(day-1,0,-1)]
        control.append(date.today())

        
        for i in hits:
            try:
                control.remove(i['fecha'])
            except ValueError:
                pass

        hits=list(hits)
            
        for i in control:
            hits.append({"fecha": i, "hits": 0})

        hits = sorted(hits, key=lambda k: k['fecha']) 

        # transformamos las fechas en isoformat
        hits=map(self._date_isoformat, hits)

        return hits

    def _date_isoformat(self, row):
        row['fecha']=row['fecha'].isoformat()
        return row
コード例 #19
0
ファイル: datastreams.py プロジェクト: PhiRequiem/datal
 def __init__(self, datastream):
     self.datastream = datastream
     #self.datastream_revision = datastream.last_published_revision
     self.search_index = ElasticsearchIndex()
     self.logger=logging.getLogger(__name__)
     self.cache=Cache()
コード例 #20
0
ファイル: datastreams.py プロジェクト: mxabierto/datal
 def __init__(self, datastream_revision):
     self.datastream_revision=datastream_revision
     self.search_index = ElasticsearchIndex()
コード例 #21
0
ファイル: datasets.py プロジェクト: PhiRequiem/datal
 def __init__(self, dataset_revision):
     self.logger = logging.getLogger(__name__)
     self.dataset_revision=dataset_revision
     self.search_index = ElasticsearchIndex()
コード例 #22
0
class DatastreamHitsDAO():
    """class for manage access to Hits in DB and index"""

    doc_type = "ds"
    from_cache = False

    # cache ttl, 1 hora
    TTL = 3600

    CHANNEL_TYPE = ("web", "api")

    def __init__(self, datastream):
        self.datastream = datastream
        if isinstance(self.datastream, dict):
            self.datastream_id = self.datastream['datastream_id']
        else:
            self.datastream_id = self.datastream.id
        #self.datastream_revision = datastream.last_published_revision
        self.search_index = ElasticsearchIndex()
        self.cache = Cache()

    def add(self, channel_type):
        """agrega un hit al datastream. """

        # TODO: Fix temporal por el paso de DT a DAO.
        # Es problema es que por momentos el datastream viene de un queryset y otras veces de un DAO y son objetos
        # distintos
        try:
            datastream_id = self.datastream.datastream_id
        except:
            datastream_id = self.datastream['datastream_id']

        try:
            guid = self.datastream.guid
        except:
            guid = self.datastream['guid']

        try:
            hit = DataStreamHits.objects.create(datastream_id=datastream_id,
                                                channel_type=channel_type)
        except IntegrityError:
            # esta correcto esta excepcion?
            raise DataStreamNotFoundException()

        # armo el documento para actualizar el index.
        doc = {
            'docid':
            "DS::%s" % guid,
            "type":
            "ds",
            "script":
            "ctx._source.fields.%s_hits+=1" % self.CHANNEL_TYPE[channel_type]
        }

        return self.search_index.update(doc)

    def count(self, channel_type=ChannelTypes.WEB):
        return DataStreamHits.objects.filter(
            datastream_id=self.datastream_id,
            channel_type=channel_type).count()

    def count_by_days(self, day=30, channel_type=None):
        """trae un dict con los hits totales de los ultimos day y los hits particulares de los días desde day hasta today"""

        # no sé si es necesario esto
        if day < 1:
            return {}

        # tenemos la fecha de inicio
        start_date = now() - timedelta(days=day)

        # tomamos solo la parte date
        truncate_date = connection.ops.date_trunc_sql('day', 'created_at')

        qs = DataStreamHits.objects.filter(datastream_id=self.datastream_id,
                                           created_at__gte=start_date)

        if channel_type:
            qs = qs.filter(channel_type=channel_type)

        hits = qs.extra(select={
            '_date': truncate_date,
            "fecha": 'DATE(created_at)'
        }).values("fecha").order_by("created_at").annotate(
            hits=Count("created_at"))

        control = [
            now().date() - timedelta(days=x) for x in range(day - 1, 0, -1)
        ]
        control.append(now().date())

        for i in hits:
            try:
                control.remove(i['fecha'])
            except ValueError:
                pass

        hits = list(hits)

        for i in control:
            hits.append({"fecha": i, "hits": 0})

        hits = sorted(hits, key=lambda k: k['fecha'])

        # transformamos las fechas en isoformat
        hits = map(self._date_isoformat, hits)
        return hits

    def _date_isoformat(self, row):
        row['fecha'] = row['fecha'].isoformat()
        return row
コード例 #23
0
ファイル: visualizations.py プロジェクト: anukat2015/datal
 def __init__(self, visualization_revision):
     self.visualization_revision = visualization_revision
     self.search_index = ElasticsearchIndex()
コード例 #24
0
ファイル: visualizations.py プロジェクト: PhiRequiem/datal
class VisualizationHitsDAO:
    """class for manage access to Hits in DB and index"""

    doc_type = "vz"
    from_cache = False

    # cache ttl, 1 hora
    TTL = 3600

    def __init__(self, visualization):
        self.visualization = visualization
        self.search_index = ElasticsearchIndex()
        self.logger = logging.getLogger(__name__)
        self.cache = Cache()

    def add(self, channel_type):
        """agrega un hit al datastream. """

        try:
            hit = VisualizationHits.objects.create(
                visualization_id=self.visualization.visualization_id, channel_type=channel_type
            )
        except IntegrityError:
            # esta correcto esta excepcion?
            raise VisualizationNotFoundException()

        self.logger.info("VisualizationHitsDAO hit! (guid: %s)" % (self.datastream.guid))

        # armo el documento para actualizar el index.
        doc = {
            "docid": "%s::%s" % (self.doc_type.upper(), self.visualization.guid),
            "type": self.doc_type,
            "script": "ctx._source.fields.hits+=1",
        }

        return self.search_index.update(doc)

    def count(self):
        return VisualizationHits.objects.filter(visualization_id=self.visualization.visualization_id).count()

    def _get_cache(self, cache_key):

        cache = self.cache.get(cache_key)

        return cache

    def _set_cache(self, cache_key, value):

        return self.cache.set(cache_key, value, self.TTL)

    def count_by_day(self, day):
        """retorna los hits de un día determinado"""

        # si es datetime, usar solo date
        if type(day) == type(datetime.today()):
            day = day.date()

        cache_key = "%s_hits_%s_by_date_%s" % (self.doc_type, self.visualization.guid, str(day))

        hits = self._get_cache(cache_key)

        # si el día particular no esta en el caché, lo guarda
        # salvo que sea el parametro pasado sea de hoy, lo guarda en el cache pero usa siempre el de la DB
        if not hits or day == date.today():
            hits = VisualizationHits.objects.filter(
                visualization=self.visualization, created_at__startswith=day
            ).count()

            self._set_cache(cache_key, hits)

        return (date, hits)

    def count_by_days(self, day=30, channel_type=None):
        """trae un dict con los hits totales de los ultimos day y los hits particulares de los días desde day hasta today"""

        # no sé si es necesario esto
        if day < 1:
            return {}

        cache_key = "%s_hits_%s_%s" % (self.doc_type, self.visualization.guid, day)

        if channel_type:
            cache_key += "_channel_type_%s" % channel_type

        hits = self._get_cache(cache_key)

        # me cachendié! no esta en la cache
        if not hits:
            # tenemos la fecha de inicio
            start_date = datetime.today() - timedelta(days=day)

            # tomamos solo la parte date
            truncate_date = connection.ops.date_trunc_sql("day", "created_at")

            qs = VisualizationHits.objects.filter(visualization=self.visualization, created_at__gte=start_date)

            if channel_type:
                qs = qs.filter(channel_type=channel_type)

            hits = (
                qs.extra(select={"_date": truncate_date, "fecha": "DATE(created_at)"})
                .values("fecha")
                .order_by("created_at")
                .annotate(hits=Count("created_at"))
            )

            control = [date.today() - timedelta(days=x) for x in range(day - 1, 0, -1)]
            control.append(date.today())

            for i in hits:
                try:
                    control.remove(i["fecha"])
                except ValueError:
                    pass

            hits = list(hits)

            for i in control:
                hits.append({"fecha": i, "hits": 0})

            hits = sorted(hits, key=lambda k: k["fecha"])

            # transformamos las fechas en isoformat
            hits = map(self._date_isoformat, hits)

            # lo dejamos, amablemente, en la cache!
            self._set_cache(cache_key, json.dumps(hits, cls=DjangoJSONEncoder))

            self.from_cache = False
        else:
            hits = json.loads(hits)
            self.from_cache = True

        return hits

    def _date_isoformat(self, row):
        row["fecha"] = row["fecha"].isoformat()
        return row
コード例 #25
0
ファイル: visualizations.py プロジェクト: PhiRequiem/datal
 def __init__(self, visualization):
     self.visualization = visualization
     self.search_index = ElasticsearchIndex()
     self.logger = logging.getLogger(__name__)
     self.cache = Cache()
コード例 #26
0
ファイル: visualizations.py プロジェクト: anukat2015/datal
class VisualizationHitsDAO():
    """class for manage access to Hits in DB and index"""

    doc_type = "vz"
    from_cache = False

    # cache ttl, 1 hora
    TTL = 3600

    CHANNEL_TYPE = ("web", "api")

    def __init__(self, visualization):
        self.visualization = visualization
        if isinstance(self.visualization, dict):
            self.visualization_id = self.visualization['visualization_id']
        else:
            self.visualization_id = self.visualization.visualization_id
        self.search_index = ElasticsearchIndex()
        self.logger = logging.getLogger(__name__)
        self.cache = Cache()

    def add(self, channel_type):
        """agrega un hit al datastream.
        :param channel_type:
        """

        # TODO: Fix temporal por el paso de DT a DAO.
        # Es problema es que por momentos el datastream viene de un queryset y otras veces de un DAO y son objetos
        # distintos
        try:
            guid = self.visualization.guid
        except:
            guid = self.visualization['guid']

        try:
            hit = VisualizationHits.objects.create(
                visualization_id=self.visualization_id,
                channel_type=channel_type)
        except IntegrityError:
            # esta correcto esta excepcion?
            raise VisualizationNotFoundException()

        self.logger.info("VisualizationHitsDAO hit! (id: %s)" %
                         (self.visualization_id))

        # armo el documento para actualizar el index.
        doc = {
            'docid': "%s::%s" % (self.doc_type.upper(), guid),
            "type": self.doc_type,
            "script": "ctx._source.fields.hits+=1",
        }
        self.search_index.update(doc)

        # ahora sumamos al hit del channel especifico
        doc['script'] = "ctx._source.fields.%s_hits+=1" % self.CHANNEL_TYPE[
            channel_type]

        return self.search_index.update(doc)

    def count(self, channel_type=None):
        """devuelve cuantos hits tiene por canal o en general
            :param: channel_type: filtra por canal
            :return: int"""

        query = VisualizationHits.objects.filter(
            visualization__id=self.visualization_id)

        if channel_type in (0, 1):
            query = query.filter(channel_type=channel_type)

        return query.count()

    def count_by_day(self, day):
        """retorna los hits de un día determinado
        :param day:
        """

        # si es datetime, usar solo date
        if type(day) == type(datetime.today()):
            day = day.date()

        cache_key = "%s_hits_%s_by_date_%s" % (
            self.doc_type, self.visualization.guid, str(day))

        hits = self._get_cache(cache_key)

        # si el día particular no esta en el caché, lo guarda
        # salvo que sea el parametro pasado sea de hoy, lo guarda en el cache pero usa siempre el de la DB
        if not hits or day == date.today():
            hits = VisualizationHits.objects.filter(
                visualization=self.visualization,
                created_at__startswith=day).count()

            self._set_cache(cache_key, hits)

        return (date, hits)

    def count_by_days(self, day=30, channel_type=None):
        """trae un dict con los hits totales de los ultimos day y los hits particulares de los días desde day hasta today
        :param channel_type:
        :param day:
        """

        # no sé si es necesario esto
        if day < 1:
            return {}

        # tenemos la fecha de inicio
        start_date = datetime.today() - timedelta(days=day)

        # tomamos solo la parte date
        truncate_date = connection.ops.date_trunc_sql('day', 'created_at')

        qs = VisualizationHits.objects.filter(
            visualization_id=self.visualization_id, created_at__gte=start_date)

        if channel_type:
            qs = qs.filter(channel_type=channel_type)

        hits = qs.extra(select={
            '_date': truncate_date,
            "fecha": 'DATE(created_at)'
        }).values("fecha").order_by("created_at").annotate(
            hits=Count("created_at"))

        control = [
            date.today() - timedelta(days=x) for x in range(day - 1, 0, -1)
        ]
        control.append(date.today())

        for i in hits:
            try:
                control.remove(i['fecha'])
            except ValueError:
                pass

        hits = list(hits)

        for i in control:
            hits.append({"fecha": i, "hits": 0})

        hits = sorted(hits, key=lambda k: k['fecha'])

        # transformamos las fechas en isoformat
        hits = map(self._date_isoformat, hits)

        return hits

    def _date_isoformat(self, row):
        row['fecha'] = row['fecha'].isoformat()
        return row
コード例 #27
0
ファイル: visualizations.py プロジェクト: PhiRequiem/datal
 def __init__(self, visualization_revision):
     self.visualization_revision = visualization_revision
     self.search_index = ElasticsearchIndex()
コード例 #28
0
ファイル: datastreams.py プロジェクト: Junar/datal
 def __init__(self, datastream):
     self.datastream = datastream
     #self.datastream_revision = datastream.last_published_revision
     self.search_index = ElasticsearchIndex()
     self.cache=Cache()
コード例 #29
0
ファイル: datastreams.py プロジェクト: anukat2015/datal
 def __init__(self, datastream_revision):
     self.datastream_revision = datastream_revision
     self.search_index = ElasticsearchIndex()
コード例 #30
0
 def __init__(self, revision):
     self.revision = revision
     self.search_index = ElasticsearchIndex()
コード例 #31
0
ファイル: index.py プロジェクト: anukat2015/datal
class Command(BaseCommand):
    help = "Index datasets."

    option_list = BaseCommand.option_list + (
        make_option('--all',
            action='store_true',
            dest='all',
            default=False,
            help='Reindex resources'),
        make_option('--flush',
            action='store_true',
            dest='flush',
            default=False,
            help='flush index'),
        make_option('--only-datasets',
            action='store_true',
            dest='datasets',
            default=False,
            help='reindex datasets'),
        make_option('--only-datastreams',
            action='store_true',
            dest='datastreams',
            default=False,
            help='reindex datastreams'),
         make_option('--only-visualizations',
            action='store_true',
            dest='visualizations',
            default=False,
            help='reindex visualization'),
         make_option('--only-dashboards',
            action='store_true',
            dest='dashboards',
            default=False,
            help='reindex dashboards'),
         make_option('--debug',
            action='store_true',
            dest='debug',
            default=False,
            help='debug'),
    )

    def handle(self, *args, **options):

        if not options['all'] and not options['datasets'] and not options['datastreams'] and not options['visualizations'] and not options['dashboards']:
            print "\nUse: "
            print "\n\treindex --<all|datasets|datastreams|visualizations|dashboards> [--flush] [--debug]\n\n"
            print "\t--all\t\t\treindex all resourses"
            print "\t--only-datasets\t\treindex datasets resourses"
            print "\t--only-datastreams\t\treindex datastreams resourses"
            print "\t--only-visualizations\treindex visualizations resourses"
            print "\t--only-dashboards\t\treindex dashboards resourses"
            print "\t--flush\t\t\tflush index"
            print "\t--debug\t\t\tdebug|verbose"
            print "\n"
            return
        


        if options['debug']:
            print "[Otions]"
            for i in options.keys():
                print "\t",i.ljust(15),": ",options[i]

        if options['flush']:
            # destruye el index
            ElasticsearchIndex().flush_index()

        # conectamos con elastic
        self.es = ElasticsearchIndex()

        # index resources
        if options['all']:
            options['datasets']=True
            options['datastreams']=True
            options['visualizations']=True
            options['dashboards']=True

        self.options=options

        self.index_datasets()
        self.index_datastreams()
        self.index_visualizations()
        self.index_dashboards()



    def index_datasets(self):
        if self.options['datasets']:
            if self.options['debug']: print "[Iniciando datasets]"
            for dataset in Dataset.objects.filter(last_published_revision__status=StatusChoices.PUBLISHED):
                try:
                    datasetrevision=dataset.last_published_revision
                    search_dao = DatasetSearchDAOFactory().create(datasetrevision)
                    search_dao.add()
                except:
                    print "[ERROR dt] Fallo al indexar Dataset. ID: {} Account: {}".format(dataset.id, dataset.user.account.name)

    def index_visualizations(self):
        if self.options['visualizations']:
            if self.options['debug']: print "[Iniciando visualizations]"
            for vz in Visualization.objects.filter(last_published_revision__status=StatusChoices.PUBLISHED):
                vz_revision=vz.last_published_revision
                search_dao = VisualizationSearchDAOFactory().create(vz_revision)
                try:
                    search_dao.add()
                except VisualizationI18n.MultipleObjectsReturned:
                    print "[ERROR vz] VisualizationI18n.MultipleObjectsReturned (vz.id= %s)" % vz.id
                    continue
                except AttributeError:
                    print "[ERROR vz] self.visualization_revision.visualization.datastream.last_published_revision == None (vz.id= %s, ds= %s)" % (vz.id, vz.datastream.id)
                    continue
                except:
                    print "[ERROR vz] Probablemente VisualizationI18n.MultipleObjectsReturned (vz.id= %s)" % vz.id
                    continue

                h = VisualizationHitsDAO(vz_revision)

                doc={
                    'docid': "VZ::%s" % vz.guid,
                    "type": "vz",
                    "doc": {
                        "fields": {
                            "hits": h.count(),
                            "web_hits": h.count(channel_type=0),
                            "api_hits": h.count(channel_type=1)
                        }
                    }
                }
                try:
                    self.es.update(doc)
                except:
                    if self.options['debug']: print "[ERROR]: No se pudo ejecutar: ",doc

    def index_datastreams(self):
        if self.options['datastreams']:
            if self.options['debug']: print "[Iniciando datastreams]"
            for datastream in DataStream.objects.filter(last_published_revision__status=StatusChoices.PUBLISHED):
                try:
                    datastreamrevision=datastream.last_published_revision
                    datastream_rev = DataStreamDBDAO().get(datastreamrevision.user,
                        datastream_revision_id=datastreamrevision.id,
                        published=True
                    )
                    search_dao = DatastreamSearchDAOFactory().create(datastreamrevision)
                    try:
                        search_dao.add()
                    except DatastreamI18n.MultipleObjectsReturned:
                        print "[ERROR ds] DatastreamI18n.MultipleObjectsReturned (ds.id= %s)" % datastream.id
                        continue
                    except AttributeError:
                        print "[ERROR ds] self.datastream.last_published_revision == None (ds= %s)" % datastream.id
                        continue

                    h = DatastreamHitsDAO(datastream_rev)

                    doc={
                        'docid': "DS::%s" % datastreamrevision.datastream.guid,
                        "type": "ds",
                        "doc": {
                            "fields": {
                                "hits": h.count(),
                                "web_hits": h.count(channel_type=0),
                                "api_hits": h.count(channel_type=1)
                            }
                        }
                    }
                    try:
                        self.es.update(doc)
                    except:
                        if self.options['debug']: print "[ERROR]: No se pudo ejecutar: ",doc
                except:
                    print "[ERROR ds] Fallo al indexar ID {} de la cuenta: {}".format(datastream.id, datastream.user.account.name)

    def index_dashboards(self):
        if self.options['dashboards']:
            if self.options['debug']: print "[Iniciando dashboards]"
            for plugin in DatalPluginPoint.get_active_with_att('reindex'):
                plugin.reindex(self.es)
コード例 #32
0
ファイル: index.py プロジェクト: berserkwarwolf/datal
    def handle(self, *args, **options):

        # index resources
        if options['reindex']:

            # destruye el index
            ElasticsearchIndex().flush_index()
            es = ElasticsearchIndex()

            for dataset in Dataset.objects.filter(last_published_revision__status=StatusChoices.PUBLISHED):
                datasetrevision=dataset.last_published_revision
                search_dao = DatasetSearchDAOFactory().create(datasetrevision)
                search_dao.add()

            for vz in Visualization.objects.filter(last_published_revision__status=StatusChoices.PUBLISHED):
                vz_revision=vz.last_published_revision
                search_dao = VisualizationSearchDAOFactory().create(vz_revision)
                search_dao.add()

                h = VisualizationHitsDAO(vz_revision)

                doc={
                    'docid': "VZ::%s" % vz.guid,
                    "type": "vz",
                    "doc": {
                        "fields": {
                            "hits": h.count(),
                            "web_hits": h.count(channel_type=0),
                            "api_hits": h.count(channel_type=1)
                        }
                    }
                }
                try:
                    es.update(doc)
                except:
                    pass

            # TODO Hay que usar el metodo query del DAO
            for datastream in DataStream.objects.filter(last_published_revision__status=StatusChoices.PUBLISHED):
                datastreamrevision=datastream.last_published_revision
                datastream_rev = DataStreamDBDAO().get(
                    datastreamrevision.user.language,
                    datastream_revision_id=datastreamrevision.id,
                    published=True
                )
                search_dao = DatastreamSearchDAOFactory().create(datastreamrevision)
                search_dao.add()

                h = DatastreamHitsDAO(datastream_rev)

                doc={
                    'docid': "DS::%s" % datastreamrevision.datastream.guid,
                    "type": "ds",
                    "doc": {
                        "fields": {
                            "hits": h.count(),
                            "web_hits": h.count(channel_type=0),
                            "api_hits": h.count(channel_type=1)
                        }
                    }
                }
                try:
                    es.update(doc)
                except:
                    pass

        for plugin in DatalPluginPoint.get_active_with_att('reindex'):
            plugin.reindex(es)
コード例 #33
0
class Command(BaseCommand):
    help = "Index datasets."

    option_list = BaseCommand.option_list + (
        make_option('--all',
                    action='store_true',
                    dest='all',
                    default=False,
                    help='Reindex resources'),
        make_option('--flush',
                    action='store_true',
                    dest='flush',
                    default=False,
                    help='flush index'),
        make_option('--only-datasets',
                    action='store_true',
                    dest='datasets',
                    default=False,
                    help='reindex datasets'),
        make_option('--only-datastreams',
                    action='store_true',
                    dest='datastreams',
                    default=False,
                    help='reindex datastreams'),
        make_option('--only-visualizations',
                    action='store_true',
                    dest='visualizations',
                    default=False,
                    help='reindex visualization'),
        make_option('--only-dashboards',
                    action='store_true',
                    dest='dashboards',
                    default=False,
                    help='reindex dashboards'),
        make_option('--debug',
                    action='store_true',
                    dest='debug',
                    default=False,
                    help='debug'),
    )

    def handle(self, *args, **options):

        if not options['all'] and not options['datasets'] and not options[
                'datastreams'] and not options[
                    'visualizations'] and not options['dashboards']:
            print "\nUse: "
            print "\n\treindex --<all|datasets|datastreams|visualizations|dashboards> [--flush] [--debug]\n\n"
            print "\t--all\t\t\treindex all resourses"
            print "\t--only-datasets\t\treindex datasets resourses"
            print "\t--only-datastreams\t\treindex datastreams resourses"
            print "\t--only-visualizations\treindex visualizations resourses"
            print "\t--only-dashboards\t\treindex dashboards resourses"
            print "\t--flush\t\t\tflush index"
            print "\t--debug\t\t\tdebug|verbose"
            print "\n"
            return

        if options['debug']:
            print "[Otions]"
            for i in options.keys():
                print "\t", i.ljust(15), ": ", options[i]

        if options['flush']:
            # destruye el index
            ElasticsearchIndex().flush_index()

        # conectamos con elastic
        self.es = ElasticsearchIndex()

        # index resources
        if options['all']:
            options['datasets'] = True
            options['datastreams'] = True
            options['visualizations'] = True
            options['dashboards'] = True

        self.options = options

        self.index_datasets()
        self.index_datastreams()
        self.index_visualizations()
        self.index_dashboards()

    def index_datasets(self):
        if self.options['datasets']:
            if self.options['debug']: print "[Iniciando datasets]"
            for dataset in Dataset.objects.filter(
                    last_published_revision__status=StatusChoices.PUBLISHED):
                try:
                    datasetrevision = dataset.last_published_revision
                    search_dao = DatasetSearchDAOFactory().create(
                        datasetrevision)
                    search_dao.add()
                except:
                    print "[ERROR dt] Fallo al indexar Dataset. ID: {} Account: {}".format(
                        dataset.id, dataset.user.account.name)

    def index_visualizations(self):
        if self.options['visualizations']:
            if self.options['debug']: print "[Iniciando visualizations]"
            for vz in Visualization.objects.filter(
                    last_published_revision__status=StatusChoices.PUBLISHED):
                vz_revision = vz.last_published_revision
                search_dao = VisualizationSearchDAOFactory().create(
                    vz_revision)
                try:
                    search_dao.add()
                except VisualizationI18n.MultipleObjectsReturned:
                    print "[ERROR vz] VisualizationI18n.MultipleObjectsReturned (vz.id= %s)" % vz.id
                    continue
                except AttributeError:
                    print "[ERROR vz] self.visualization_revision.visualization.datastream.last_published_revision == None (vz.id= %s, ds= %s)" % (
                        vz.id, vz.datastream.id)
                    continue
                except:
                    print "[ERROR vz] Probablemente VisualizationI18n.MultipleObjectsReturned (vz.id= %s)" % vz.id
                    continue

                h = VisualizationHitsDAO(vz_revision)

                doc = {
                    'docid': "VZ::%s" % vz.guid,
                    "type": "vz",
                    "doc": {
                        "fields": {
                            "hits": h.count(),
                            "web_hits": h.count(channel_type=0),
                            "api_hits": h.count(channel_type=1)
                        }
                    }
                }
                try:
                    self.es.update(doc)
                except:
                    if self.options['debug']:
                        print "[ERROR]: No se pudo ejecutar: ", doc

    def index_datastreams(self):
        if self.options['datastreams']:
            if self.options['debug']: print "[Iniciando datastreams]"
            for datastream in DataStream.objects.filter(
                    last_published_revision__status=StatusChoices.PUBLISHED):
                try:
                    datastreamrevision = datastream.last_published_revision
                    datastream_rev = DataStreamDBDAO().get(
                        datastreamrevision.user,
                        datastream_revision_id=datastreamrevision.id,
                        published=True)
                    search_dao = DatastreamSearchDAOFactory().create(
                        datastreamrevision)
                    try:
                        search_dao.add()
                    except DatastreamI18n.MultipleObjectsReturned:
                        print "[ERROR ds] DatastreamI18n.MultipleObjectsReturned (ds.id= %s)" % datastream.id
                        continue
                    except AttributeError:
                        print "[ERROR ds] self.datastream.last_published_revision == None (ds= %s)" % datastream.id
                        continue

                    h = DatastreamHitsDAO(datastream_rev)

                    doc = {
                        'docid': "DS::%s" % datastreamrevision.datastream.guid,
                        "type": "ds",
                        "doc": {
                            "fields": {
                                "hits": h.count(),
                                "web_hits": h.count(channel_type=0),
                                "api_hits": h.count(channel_type=1)
                            }
                        }
                    }
                    try:
                        self.es.update(doc)
                    except:
                        if self.options['debug']:
                            print "[ERROR]: No se pudo ejecutar: ", doc
                except:
                    print "[ERROR ds] Fallo al indexar ID {} de la cuenta: {}".format(
                        datastream.id, datastream.user.account.name)

    def index_dashboards(self):
        if self.options['dashboards']:
            if self.options['debug']: print "[Iniciando dashboards]"
            for plugin in DatalPluginPoint.get_active_with_att('reindex'):
                plugin.reindex(self.es)