コード例 #1
0
ファイル: decorators.py プロジェクト: Kyeongrok/dms
 def inner_wrapper(*args, **kwds):
     try:
         return func(*args, **kwds)
     except TransportError as e:
         if e.status_code == 409:
             raise DMSConflictError.from_exception(e)
         raise DMSClientException.from_exception(e)
     except ElasticsearchException as e:
         raise DMSClientException.from_exception(e)
コード例 #2
0
ファイル: __init__.py プロジェクト: Kyeongrok/dms
    def delete(self, doc_id):
        """
        Delete the document with the given ID.

        :param doc_id: the document ID to be deleted
        :type doc_id: string
        :raises dmsclient.exceptions.DMSDocumentNotFoundError: if document with the given ID does not exist
        :raises dmsclient.exceptions.DMSClientException: if any other error occur
        """
        result = self.client.elasticsearch.delete_by_query(
            index=self.model_class.TEMPLATE,
            doc_type=self.model_class.DOC_TYPE,
            body={'query': {
                'term': {
                    '_id': doc_id
                }
            }},
            params={'refresh': 'true'})

        if result['total'] == 0:
            raise DMSDocumentNotFoundError("Could not find %s with ID '%s'" %
                                           (self.model_class.__name__, doc_id))
        if result['deleted'] != 1:
            raise DMSClientException(
                "Unexpected error deleting %s with ID '%s': %s" %
                (self.model_class.__name__, doc_id, str(result)))
コード例 #3
0
ファイル: client.py プロジェクト: Kyeongrok/dms
    def verify_templates(self):
        """
        Verify that the template names are available in Elasticsearch.

        :raises dmsclient.exceptions.DMSClientException: if a template does not exist in Elasticsearch
        """
        for template in MAPPINGS.keys():
            logger.info("Verifying template '%s'..." % (template, ))
            if not self.elasticsearch.indices.exists_template(template):
                raise DMSClientException(
                    "Template '%s' does not exist in Elasticsearch" %
                    (template, ))
コード例 #4
0
ファイル: client.py プロジェクト: Kyeongrok/dms
    def sync_cluster_config(self, force=False):
        """
        Obtain the latest cluster configuration from Elasticsearch and update the hash ring.
         This method is called every time there is an operation that requires communication
         with Elasticsearch. However, it will only synchronize the cluster configuration if
         the time elapsed since the last sync is greater than `SYNC_INTERVAL` or the `force`
         parameter is set to `True`.

        :param bool force: Force synchronization regardless of the time elapsed since last sync
        :raises dmsclient.exceptions.DMSClientException: if there is no cluster configured in Elasticsearch
        """
        if not force and (
            (datetime.now() - self.last_sync) < self.SYNC_INTERVAL):
            return

        logger.debug('Syncing cluster configuration')
        nodes_ring = {}

        try:
            clusters = self.clusters.get_all()
            for cluster in clusters:
                if not cluster.available:
                    continue
                nodes_ring[cluster.cluster_id] = {
                    'instance': cluster,
                    'weight': cluster.weight
                }
        except Exception as e:
            raise DMSClientException(
                "Could not obtain cluster configuration. %s" % (str(e), ))

        logger.debug('Obtained %d clusters from Elasticsearch' %
                     (len(nodes_ring), ))

        if len(nodes_ring) == 0:
            raise DMSClientException(
                'There are no available cluster configured in Elasticsearch')

        self.hashring = HashRing(nodes=nodes_ring)
        self.last_sync = datetime.now()
コード例 #5
0
ファイル: __init__.py プロジェクト: Kyeongrok/dms
    def __update__(self, doc_id, fields):
        """
        Update fields for the document matching the given ID.

        WARNING: use this with caution as it will not prevent you from updating
        protected fields. To update or create new unprotected fields, use `set_fields`
        instead.

        :param doc_id: the document ID to be updated
        :type doc_id: string
        :param fields: fields to be updated
        :type fields: dict
        :raises dmsclient.exceptions.DMSDocumentNotFoundError: if document with the given ID does not exist
        :raises dmsclient.exceptions.DMSClientException: if any other error occur
        """

        fields['updated_at'] = datetime.utcnow()

        # Partial update via "doc" is not available in "update_by_query" call
        # https://github.com/elastic/elasticsearch/issues/20135
        script = {"lang": "painless", "source": "", "params": {}}
        for field, value in fields.items():
            script['source'] += "ctx._source.{} = params.{}; ".format(
                field, field)
            script['params'][field] = value

        result = self.client.elasticsearch.update_by_query(
            index=self.model_class.TEMPLATE,
            doc_type=self.model_class.DOC_TYPE,
            body={
                "query": {
                    "term": {
                        "_id": doc_id
                    }
                },
                "script": script
            },
            params={'refresh': 'true'})

        if result['total'] == 0:
            raise DMSDocumentNotFoundError("Could not find %s with ID '%s'" %
                                           (self.model_class.__name__, doc_id))
        if result['updated'] != 1:
            raise DMSClientException(
                'Unexpected error updating %s with ID %s: %s' %
                (self.model_class.__name__, doc_id, str(result)))
コード例 #6
0
ファイル: manager.py プロジェクト: Kyeongrok/dms
    def __init__(self, config, mount_path, reader_id, cartridge_id):
        super(RegularIngestManager, self).__init__(config, mount_path,
                                                   reader_id, cartridge_id)
        self.log = logging.getLogger('ingest.regular.manager')

        # set cartridge workflow type for regular ingestion
        self.set_cartridge_workflow_type(cartridge_id,
                                         Cartridge.WorkflowType.INGESTION)

        if self.check_mountpoints:
            self.log.info("Checking mount points")
            try:
                for cluster in self.client.hashring.get_instances():
                    if not util.is_mounted(cluster.raw_mount):
                        raise DMSClientException("'{}' is not mounted".format(
                            str(cluster.raw_mount)))
            except Exception as e:
                self.log.error('Error checking the mount points', e)
                raise e
コード例 #7
0
ファイル: __init__.py プロジェクト: Kyeongrok/dms
    def remove_field(self, doc_id, field_key):
        """
        Remove an unprotected field from the document matching the given ID
        and field key.

        :param doc_id: the document ID
        :type doc_id: string
        :param field_key: the field key to be removed
        :type field_key: string
        :raises ValueError: if any of the fields is protected
        :raises dmsclient.exceptions.DMSDocumentNotFoundError: if document with the given ID does not exist
        :raises dmsclient.exceptions.DMSClientException: if any other error occur
        """
        if field_key in self.model_class.PROTECTED_ATTRIBUTES:
            raise ValueError("'%s' is a protected field of %s." %
                             (field_key, self.model_class.__name__))

        result = self.client.elasticsearch.update_by_query(
            index=self.model_class.TEMPLATE,
            doc_type=self.model_class.DOC_TYPE,
            body={
                "query": {
                    "term": {
                        "_id": doc_id
                    }
                },
                "script": {
                    "source": "ctx._source.remove(params.field)",
                    "params": {
                        "field": field_key
                    }
                }
            },
            params={'refresh': 'true'})

        if result['total'] == 0:
            raise DMSDocumentNotFoundError("Could not find %s with ID '%s'" %
                                           (self.model_class.__name__, doc_id))
        if result['updated'] != 1:
            raise DMSClientException(
                'Unexpected error updating %s with ID %s: %s' %
                (self.model_class.__name__, doc_id, str(result)))
コード例 #8
0
ファイル: __init__.py プロジェクト: Kyeongrok/dms
    def remove_tags(self, doc_id, tags):
        """
        Remove tags for the document matching the given ID.

        :param doc_id: the document ID
        :type doc_id: string
        :param tags: the list of tags to be removed
        :type tags: list
        :raises dmsclient.exceptions.DMSClientException: if an error occurs
        """
        assert isinstance(tags, list)

        result = self.client.elasticsearch.update_by_query(
            index=self.model_class.INDEX,
            doc_type=self.model_class.DOC_TYPE,
            body={
                "query": {
                    "term": {
                        "_id": doc_id
                    }
                },
                "script": {
                    "source":
                    "ctx._source.tags.removeAll(params.tags); ctx._source.updated_at = params.updated_at;",
                    "params": {
                        "tags": tags,
                        "updated_at": datetime.utcnow()
                    }
                }
            },
            params={'refresh': 'true'})

        if result['total'] == 0:
            raise DMSDocumentNotFoundError('Could not find %s with ID %s' %
                                           (self.model_class.__name__, doc_id))
        if result['updated'] != 1:
            raise DMSClientException(
                'Unexpected error updating %s with ID %s: %s' %
                (self.model_class.__name__, doc_id, str(result)))
コード例 #9
0
ファイル: client.py プロジェクト: Kyeongrok/dms
    def get_index(self, index_pattern, doc_type, **fields):
        """
        Return the index of the object that matches with the provided filters.

        :param string index_pattern: Index pattern to perform the search
        :param string doc_type: Document type to look for
        :param fields: Arguments that are used in the query as filters
        :return: The index name that matches the given filters
        :raises dmsclient.exceptions.DMSClientException: if there is no match
        """
        query = {'query': {'bool': {'must': []}}}
        for k, v in fields.items():
            query['query']['bool']['must'].append({'match': {k: v}})

        result = self.elasticsearch.search(index=index_pattern,
                                           doc_type=doc_type,
                                           body=query)
        docs = result['hits']['hits']
        if len(docs) == 1:
            index = docs[0]['_index']
            return index
        else:
            message = '"found":false'
            raise DMSClientException(message, 404)
コード例 #10
0
    def __init__(self, config, mount_path, reader_id, cartridge_id):
        super(SensorManager, self).__init__(config, mount_path, reader_id,
                                            cartridge_id)
        self.log = logging.getLogger('ingest.sensor.manager')
        self.ingest_dir = config['sensor_mode']['ingest_directory']
        self.egest_dir = config['sensor_mode']['egest_directory']
        self.sensor_type = config['sensor_mode']['sensor_type']

        # set cartridge workflow type based on the sensor mode
        workflow_type = Cartridge.WorkflowType.FLC_INGESTION \
            if self.__is_ingest_mode() else Cartridge.WorkflowType.FLC_EGESTION
        self.set_cartridge_workflow_type(cartridge_id, workflow_type)

        if self.check_mountpoints:
            self.log.info("Checking mount points")
            try:
                for cluster in self.client.hashring.get_instances():
                    for mount in [cluster.perm_mount, cluster.resim_mount]:
                        if not util.is_mounted(mount):
                            raise DMSClientException(
                                "'{}' is not mounted".format(str(mount)))
            except Exception as e:
                self.log.error('Error checking the mount points', e)
                raise e
コード例 #11
0
ファイル: segment.py プロジェクト: Kyeongrok/dms
    def from_drive(cls, segment_id, sequence, drive, cluster, **kwargs):
        m = re.match(cls.INGEST_REGEX, segment_id)
        if not m:
            raise DMSInvalidFormat("Could not parse segment_id '%s'" %
                                   (segment_id, ))

        car_id = m.group('car_id')
        project_name = m.group('project_name')
        started_at = datetime(year=int(m.group('start_year')),
                              month=int(m.group('start_month')),
                              day=int(m.group('start_day')),
                              hour=int(m.group('start_hour')),
                              minute=int(m.group('start_minute')),
                              second=int(m.group('start_second')),
                              tzinfo=timezone.utc)
        ended_at = datetime(year=int(m.group('end_year')),
                            month=int(m.group('end_month')),
                            day=int(m.group('end_day')),
                            hour=int(m.group('end_hour')),
                            minute=int(m.group('end_minute')),
                            second=int(m.group('end_second')),
                            tzinfo=timezone.utc)

        if car_id != drive.car_id:
            raise DMSClientException(
                'car_id mismatch between drive and segment [%s!=%s]' %
                (car_id, drive.car_id))

        if project_name != drive.project_name:
            raise DMSClientException(
                'project_name mismatch between drive and segment [%s!=%s]' %
                (project_name, drive.project_name))

        path = os.path.join(drive.car_id, drive.logged_at.strftime("%Y%m"),
                            drive.logged_at.strftime("%dT%H%M%S"),
                            str(sequence).zfill(4))

        perm_path = os.path.join(cluster.perm_mount, path)
        output_path = os.path.join(cluster.output_mount, path)
        resim_path = os.path.join(cluster.resim_mount, path)

        output_export = os.path.join(cluster.output_export, path)
        perm_export = os.path.join(cluster.perm_export, path)
        resim_export = os.path.join(cluster.resim_export, path)

        nfs_host = cluster.nfs_host
        smb_host = cluster.smb_host

        output_share = os.path.join(cluster.output_share, path)
        perm_share = os.path.join(cluster.perm_share, path)
        resim_share = os.path.join(cluster.resim_share, path)

        return cls(segment_id=segment_id,
                   sequence=sequence,
                   drive_id=drive.drive_id,
                   project_name=project_name,
                   car_id=car_id,
                   cluster_id=cluster.cluster_id,
                   started_at=started_at,
                   ended_at=ended_at,
                   nfs_host=nfs_host,
                   smb_host=smb_host,
                   output_export=output_export,
                   perm_export=perm_export,
                   resim_export=resim_export,
                   perm_path=perm_path,
                   output_path=output_path,
                   resim_path=resim_path,
                   perm_share=perm_share,
                   output_share=output_share,
                   resim_share=resim_share,
                   **kwargs)
コード例 #12
0
ファイル: worker.py プロジェクト: Kyeongrok/dms
    def process_drive(self, dir_name, source_path):
        start_time = time.time()
        drive = None

        try:
            total_size, file_count = util.get_dir_stats(source_path)
        except Exception as e:
            self.log.warning(
                "[%s] Could not obtain drive size and file count: %s" % (
                    self.getName(),
                    e,
                ))
            total_size, file_count = 0, 0

        hostname = socket.gethostname()

        try:
            drive = self.client.drives.create_from_ingest(
                dir_name=dir_name,
                source_path=source_path,
                ingest_station=hostname,
                size=total_size,
                file_count=file_count,
                ingest_version=ingest.__version__,
                dmsclient_version=dmsclient.__version__,
            )
        except DMSClientException as e:
            if e.status_code != 409:
                raise e

        if not drive:
            drive = self.client.drives.get(dir_name)
            # Need to update the source_path as it can be different from the source_path when it was created
            drive.source_path = source_path
            self.log.info(
                "Drive '{}' (state: '{}') already existed. Reingesting...".
                format(dir_name, drive.state))

        if not is_mounted(drive.target_path
                          ) and self.config['general']['check_mountpoints']:
            self.client.drives.set_state(drive.drive_id, 'copy_failed')
            raise DMSClientException(
                "No mount point found for path '{}'".format(drive.target_path))

        try:
            self.client.drives.set_state(drive.drive_id, 'copying')
            self.__copy_data(drive)
            ingest_duration = drive.ingest_duration + int(time.time() -
                                                          start_time)
            self.client.drives.__update__(
                drive.drive_id, {
                    'state': 'copied',
                    'ingest_duration': ingest_duration,
                    'ingest_version': ingest.__version__,
                    'dmsclient_version': dmsclient.__version__,
                })
            self.log.info("Drive '{}' ingested successfully".format(dir_name))
        except Exception as e:
            self.client.drives.set_state(drive.drive_id, 'copy_failed')
            self.log.error("Drive '%s' failed. Error: %s" % (dir_name, e))
            raise e