Example #1
0
 def __init__(self, visualization):
     self.visualization = visualization
     if isinstance(self.visualization, dict):
         self.visualization_id = self.visualization['visualization_id']
     else:
         self.visualization_id = self.visualization.visualization_id
     self.search_index = ElasticsearchIndex()
     self.logger = logging.getLogger(__name__)
     self.cache = Cache()
Example #2
0
 def __init__(self, datastream):
     self.datastream = datastream
     if isinstance(self.datastream, dict):
         self.datastream_id = self.datastream['datastream_id']
     else:
         self.datastream_id = self.datastream.id
     #self.datastream_revision = datastream.last_published_revision
     self.search_index = ElasticsearchIndex()
     self.cache = Cache()
Example #3
0
    def handle(self, *args, **options):

        if not options['all'] and not options['datasets'] and not options[
                'datastreams'] and not options[
                    'visualizations'] and not options['dashboards']:
            print "\nUse: "
            print "\n\treindex --<all|datasets|datastreams|visualizations|dashboards> [--flush] [--debug]\n\n"
            print "\t--all\t\t\treindex all resourses"
            print "\t--only-datasets\t\treindex datasets resourses"
            print "\t--only-datastreams\t\treindex datastreams resourses"
            print "\t--only-visualizations\treindex visualizations resourses"
            print "\t--only-dashboards\t\treindex dashboards resourses"
            print "\t--flush\t\t\tflush index"
            print "\t--debug\t\t\tdebug|verbose"
            print "\n"
            return

        if options['debug']:
            print "[Otions]"
            for i in options.keys():
                print "\t", i.ljust(15), ": ", options[i]

        if options['flush']:
            # destruye el index
            ElasticsearchIndex().flush_index()

        # conectamos con elastic
        self.es = ElasticsearchIndex()

        # index resources
        if options['all']:
            options['datasets'] = True
            options['datastreams'] = True
            options['visualizations'] = True
            options['dashboards'] = True

        self.options = options

        self.index_datasets()
        self.index_datastreams()
        self.index_visualizations()
        self.index_dashboards()
Example #4
0
    def __init__(self):

        logger.info('New %sIndex INIT' % settings.USE_SEARCHINDEX)
        if settings.USE_SEARCHINDEX == 'searchify':
            self.index = SearchifyIndex()
        elif settings.USE_SEARCHINDEX == 'elasticsearch':
            self.index = ElasticsearchIndex()
#        elif settings.USE_SEARCHINDEX == 'test':
#            self.search_dao = DatastreamSearchDAO(datastream_revision)
        else:
            raise SearchIndexNotFoundException()
Example #5
0
    def update_timestamp(self, response, resource):
        doubts = [
            'is_file' in resource and resource['is_file'],  
            'collect_type' in resource and resource['collect_type'] == choices.CollectTypeChoices.URL,
            self.dao_pk == 'datastream_revision_id'
        ]
        if all(doubts):
            if type(response) == type({}) and "fTimestamp" in response.keys():
                timestamp=response['fTimestamp']
            else:
                timestamp=int(round(time.time() * 1000))

            try:
                es = ElasticsearchIndex()
                doc_id = es.search(doc_type="ds", query={ "query": { "match": {"revision_id": resource['revision_id']}}}, fields="_id")['hits']['hits'][0]['_id']
                es.update({'doc': {'fields': {'timestamp': timestamp}}, 'docid': doc_id, 'type': "ds"})
            except IndexError:
                pass
            except Exception as e:
                logger.warning('[ENGINE COMMAND] error desconocido %s ' % str(e))
Example #6
0
 def __init__(self, revision):
     self.revision = revision
     self.search_index = ElasticsearchIndex()
Example #7
0
 def __init__(self, dataset_revision):
     self.logger = logging.getLogger(__name__)
     self.dataset_revision = dataset_revision
     self.search_index = ElasticsearchIndex()
Example #8
0
 def __init__(self, datastream_revision):
     self.datastream_revision = datastream_revision
     self.search_index = ElasticsearchIndex()
Example #9
0
 def __init__(self, visualization_revision):
     self.visualization_revision = visualization_revision
     self.search_index = ElasticsearchIndex()
Example #10
0
    def handle(self, *args, **options):

        # index resources
        if options['reindex']:

            # destruye el index
            ElasticsearchIndex().flush_index()
            es = ElasticsearchIndex()

            for dataset in Dataset.objects.filter(last_published_revision__status=StatusChoices.PUBLISHED):
                datasetrevision=dataset.last_published_revision
                search_dao = DatasetSearchDAOFactory().create(datasetrevision)
                search_dao.add()

            for vz in Visualization.objects.filter(last_published_revision__status=StatusChoices.PUBLISHED):
                vz_revision=vz.last_published_revision
                search_dao = VisualizationSearchDAOFactory().create(vz_revision)
                search_dao.add()

                h = VisualizationHitsDAO(vz_revision)

                doc={
                    'docid': "VZ::%s" % vz.guid,
                    "type": "vz",
                    "doc": {
                        "fields": {
                            "hits": h.count(),
                            "web_hits": h.count(channel_type=0),
                            "api_hits": h.count(channel_type=1)
                        }
                    }
                }
                try:
                    es.update(doc)
                except:
                    pass

            # TODO Hay que usar el metodo query del DAO
            for datastream in DataStream.objects.filter(last_published_revision__status=StatusChoices.PUBLISHED):
                datastreamrevision=datastream.last_published_revision
                datastream_rev = DataStreamDBDAO().get(
                    datastreamrevision.user.language,
                    datastream_revision_id=datastreamrevision.id,
                    published=True
                )
                search_dao = DatastreamSearchDAOFactory().create(datastreamrevision)
                search_dao.add()

                h = DatastreamHitsDAO(datastream_rev)

                doc={
                    'docid': "DS::%s" % datastreamrevision.datastream.guid,
                    "type": "ds",
                    "doc": {
                        "fields": {
                            "hits": h.count(),
                            "web_hits": h.count(channel_type=0),
                            "api_hits": h.count(channel_type=1)
                        }
                    }
                }
                try:
                    es.update(doc)
                except:
                    pass

        for plugin in DatalPluginPoint.get_active_with_att('reindex'):
            plugin.reindex(es)
Example #11
0
    def _request(self, query):
        url = self._get_url()
        response = None

        try:
            params = urllib.urlencode(query)

            self.logger.info("URL: %s Params: %s query: %s method: %s" %
                             (url, params, query, self.method))

            try:
                if self.method == 'GET':
                    response = urllib.urlopen(url + '?' + params)
                elif self.method == 'POST':
                    response = urllib.urlopen(url, params)
            except Exception, e:
                self.logger.error('Error trying to access to %s | %s (%s) ' %
                                  (url, str(params), str(e)))
                raise

            if response:
                if response.getcode() == 200:
                    ret = response.read()
                    if len(response.info().getplist()) > 0:
                        mimetype = '{0}; {1}'.format(
                            response.info().gettype(),
                            response.info().getplist()[0])
                    else:
                        mimetype = 'application; json'

                    # solo si es un json
                    if mimetype.split(";")[0] == 'application/json':
                        try:
                            # obtenemos el json para sacar el ftimestamp
                            aux = json.loads(ret)
                            if type(aux) == type(
                                {}) and "fTimestamp" in aux.keys():

                                pids = filter(
                                    None,
                                    map(lambda x: x[0] == 'pId' and x[1],
                                        query))
                                if len(pids) > 0:
                                    pId = pids[0]
                                    if settings.DEBUG:
                                        self.logger.info(
                                            '[ENGINE COMMAND] Salvamos el fTimestamp de %s (pId: %s)'
                                            % (aux["fTimestamp"], pId))

                                    try:
                                        es = ElasticsearchIndex()
                                        doc_id = es.search(
                                            doc_type="ds",
                                            query={
                                                "query": {
                                                    "match": {
                                                        "revision_id": pId
                                                    }
                                                }
                                            },
                                            fields="_id"
                                        )['hits']['hits'][0]['_id']
                                        es.update({
                                            'doc': {
                                                'fields': {
                                                    'timestamp':
                                                    aux['fTimestamp']
                                                }
                                            },
                                            'docid': doc_id,
                                            'type': "ds"
                                        })
                                    except IndexError:
                                        self.logger.warning(
                                            '[ENGINE COMMAND] revision id %s no existe en indexador, posiblemente no este publicado'
                                        )
                        except ValueError:
                            self.logger.error(
                                '[ENGINE COMMAND] ret no es un json')

                    return ret, mimetype

            raise IOError('Error code %d at %s+%s' %
                          (response.getcode(), url, str(params)))