def __init__(self, visualization): self.visualization = visualization if isinstance(self.visualization, dict): self.visualization_id = self.visualization['visualization_id'] else: self.visualization_id = self.visualization.visualization_id self.search_index = ElasticsearchIndex() self.logger = logging.getLogger(__name__) self.cache = Cache()
def __init__(self, datastream): self.datastream = datastream if isinstance(self.datastream, dict): self.datastream_id = self.datastream['datastream_id'] else: self.datastream_id = self.datastream.id #self.datastream_revision = datastream.last_published_revision self.search_index = ElasticsearchIndex() self.cache = Cache()
def handle(self, *args, **options): if not options['all'] and not options['datasets'] and not options[ 'datastreams'] and not options[ 'visualizations'] and not options['dashboards']: print "\nUse: " print "\n\treindex --<all|datasets|datastreams|visualizations|dashboards> [--flush] [--debug]\n\n" print "\t--all\t\t\treindex all resourses" print "\t--only-datasets\t\treindex datasets resourses" print "\t--only-datastreams\t\treindex datastreams resourses" print "\t--only-visualizations\treindex visualizations resourses" print "\t--only-dashboards\t\treindex dashboards resourses" print "\t--flush\t\t\tflush index" print "\t--debug\t\t\tdebug|verbose" print "\n" return if options['debug']: print "[Otions]" for i in options.keys(): print "\t", i.ljust(15), ": ", options[i] if options['flush']: # destruye el index ElasticsearchIndex().flush_index() # conectamos con elastic self.es = ElasticsearchIndex() # index resources if options['all']: options['datasets'] = True options['datastreams'] = True options['visualizations'] = True options['dashboards'] = True self.options = options self.index_datasets() self.index_datastreams() self.index_visualizations() self.index_dashboards()
def __init__(self): logger.info('New %sIndex INIT' % settings.USE_SEARCHINDEX) if settings.USE_SEARCHINDEX == 'searchify': self.index = SearchifyIndex() elif settings.USE_SEARCHINDEX == 'elasticsearch': self.index = ElasticsearchIndex() # elif settings.USE_SEARCHINDEX == 'test': # self.search_dao = DatastreamSearchDAO(datastream_revision) else: raise SearchIndexNotFoundException()
def update_timestamp(self, response, resource): doubts = [ 'is_file' in resource and resource['is_file'], 'collect_type' in resource and resource['collect_type'] == choices.CollectTypeChoices.URL, self.dao_pk == 'datastream_revision_id' ] if all(doubts): if type(response) == type({}) and "fTimestamp" in response.keys(): timestamp=response['fTimestamp'] else: timestamp=int(round(time.time() * 1000)) try: es = ElasticsearchIndex() doc_id = es.search(doc_type="ds", query={ "query": { "match": {"revision_id": resource['revision_id']}}}, fields="_id")['hits']['hits'][0]['_id'] es.update({'doc': {'fields': {'timestamp': timestamp}}, 'docid': doc_id, 'type': "ds"}) except IndexError: pass except Exception as e: logger.warning('[ENGINE COMMAND] error desconocido %s ' % str(e))
def __init__(self, revision): self.revision = revision self.search_index = ElasticsearchIndex()
def __init__(self, dataset_revision): self.logger = logging.getLogger(__name__) self.dataset_revision = dataset_revision self.search_index = ElasticsearchIndex()
def __init__(self, datastream_revision): self.datastream_revision = datastream_revision self.search_index = ElasticsearchIndex()
def __init__(self, visualization_revision): self.visualization_revision = visualization_revision self.search_index = ElasticsearchIndex()
def handle(self, *args, **options): # index resources if options['reindex']: # destruye el index ElasticsearchIndex().flush_index() es = ElasticsearchIndex() for dataset in Dataset.objects.filter(last_published_revision__status=StatusChoices.PUBLISHED): datasetrevision=dataset.last_published_revision search_dao = DatasetSearchDAOFactory().create(datasetrevision) search_dao.add() for vz in Visualization.objects.filter(last_published_revision__status=StatusChoices.PUBLISHED): vz_revision=vz.last_published_revision search_dao = VisualizationSearchDAOFactory().create(vz_revision) search_dao.add() h = VisualizationHitsDAO(vz_revision) doc={ 'docid': "VZ::%s" % vz.guid, "type": "vz", "doc": { "fields": { "hits": h.count(), "web_hits": h.count(channel_type=0), "api_hits": h.count(channel_type=1) } } } try: es.update(doc) except: pass # TODO Hay que usar el metodo query del DAO for datastream in DataStream.objects.filter(last_published_revision__status=StatusChoices.PUBLISHED): datastreamrevision=datastream.last_published_revision datastream_rev = DataStreamDBDAO().get( datastreamrevision.user.language, datastream_revision_id=datastreamrevision.id, published=True ) search_dao = DatastreamSearchDAOFactory().create(datastreamrevision) search_dao.add() h = DatastreamHitsDAO(datastream_rev) doc={ 'docid': "DS::%s" % datastreamrevision.datastream.guid, "type": "ds", "doc": { "fields": { "hits": h.count(), "web_hits": h.count(channel_type=0), "api_hits": h.count(channel_type=1) } } } try: es.update(doc) except: pass for plugin in DatalPluginPoint.get_active_with_att('reindex'): plugin.reindex(es)
def _request(self, query): url = self._get_url() response = None try: params = urllib.urlencode(query) self.logger.info("URL: %s Params: %s query: %s method: %s" % (url, params, query, self.method)) try: if self.method == 'GET': response = urllib.urlopen(url + '?' + params) elif self.method == 'POST': response = urllib.urlopen(url, params) except Exception, e: self.logger.error('Error trying to access to %s | %s (%s) ' % (url, str(params), str(e))) raise if response: if response.getcode() == 200: ret = response.read() if len(response.info().getplist()) > 0: mimetype = '{0}; {1}'.format( response.info().gettype(), response.info().getplist()[0]) else: mimetype = 'application; json' # solo si es un json if mimetype.split(";")[0] == 'application/json': try: # obtenemos el json para sacar el ftimestamp aux = json.loads(ret) if type(aux) == type( {}) and "fTimestamp" in aux.keys(): pids = filter( None, map(lambda x: x[0] == 'pId' and x[1], query)) if len(pids) > 0: pId = pids[0] if settings.DEBUG: self.logger.info( '[ENGINE COMMAND] Salvamos el fTimestamp de %s (pId: %s)' % (aux["fTimestamp"], pId)) try: es = ElasticsearchIndex() doc_id = es.search( doc_type="ds", query={ "query": { "match": { "revision_id": pId } } }, fields="_id" )['hits']['hits'][0]['_id'] es.update({ 'doc': { 'fields': { 'timestamp': aux['fTimestamp'] } }, 'docid': doc_id, 'type': "ds" }) except IndexError: self.logger.warning( '[ENGINE COMMAND] revision id %s no existe en indexador, posiblemente no este publicado' ) except ValueError: self.logger.error( '[ENGINE COMMAND] ret no es un json') return ret, mimetype raise IOError('Error code %d at %s+%s' % (response.getcode(), url, str(params)))