Esempio n. 1
0
    def update_timestamp(self, response, resource):
        doubts = [
            'is_file' in resource and resource['is_file'],  
            'collect_type' in resource and resource['collect_type'] == choices.CollectTypeChoices.URL,
            self.dao_pk == 'datastream_revision_id'
        ]
        if all(doubts):
            if type(response) == type({}) and "fTimestamp" in response.keys():
                timestamp=response['fTimestamp']
            else:
                timestamp=int(round(time.time() * 1000))

            try:
                es = ElasticsearchIndex()
                doc_id = es.search(doc_type="ds", query={ "query": { "match": {"revision_id": resource['revision_id']}}}, fields="_id")['hits']['hits'][0]['_id']
                es.update({'doc': {'fields': {'timestamp': timestamp}}, 'docid': doc_id, 'type': "ds"})
            except IndexError:
                pass
            except Exception as e:
                logger.warning('[ENGINE COMMAND] error desconocido %s ' % str(e))
Esempio n. 2
0
    def _request(self, query):
        url = self._get_url()
        response = None

        try:
            params = urllib.urlencode(query)

            self.logger.info("URL: %s Params: %s query: %s method: %s" %
                             (url, params, query, self.method))

            try:
                if self.method == 'GET':
                    response = urllib.urlopen(url + '?' + params)
                elif self.method == 'POST':
                    response = urllib.urlopen(url, params)
            except Exception, e:
                self.logger.error('Error trying to access to %s | %s (%s) ' %
                                  (url, str(params), str(e)))
                raise

            if response:
                if response.getcode() == 200:
                    ret = response.read()
                    if len(response.info().getplist()) > 0:
                        mimetype = '{0}; {1}'.format(
                            response.info().gettype(),
                            response.info().getplist()[0])
                    else:
                        mimetype = 'application; json'

                    # solo si es un json
                    if mimetype.split(";")[0] == 'application/json':
                        try:
                            # obtenemos el json para sacar el ftimestamp
                            aux = json.loads(ret)
                            if type(aux) == type(
                                {}) and "fTimestamp" in aux.keys():

                                pids = filter(
                                    None,
                                    map(lambda x: x[0] == 'pId' and x[1],
                                        query))
                                if len(pids) > 0:
                                    pId = pids[0]
                                    if settings.DEBUG:
                                        self.logger.info(
                                            '[ENGINE COMMAND] Salvamos el fTimestamp de %s (pId: %s)'
                                            % (aux["fTimestamp"], pId))

                                    try:
                                        es = ElasticsearchIndex()
                                        doc_id = es.search(
                                            doc_type="ds",
                                            query={
                                                "query": {
                                                    "match": {
                                                        "revision_id": pId
                                                    }
                                                }
                                            },
                                            fields="_id"
                                        )['hits']['hits'][0]['_id']
                                        es.update({
                                            'doc': {
                                                'fields': {
                                                    'timestamp':
                                                    aux['fTimestamp']
                                                }
                                            },
                                            'docid': doc_id,
                                            'type': "ds"
                                        })
                                    except IndexError:
                                        self.logger.warning(
                                            '[ENGINE COMMAND] revision id %s no existe en indexador, posiblemente no este publicado'
                                        )
                        except ValueError:
                            self.logger.error(
                                '[ENGINE COMMAND] ret no es un json')

                    return ret, mimetype

            raise IOError('Error code %d at %s+%s' %
                          (response.getcode(), url, str(params)))