Ejemplo n.º 1
0
 def create_from_json(self, j, user=None):
     if self.process is None:
         self.process = DVAPQL()
     if not (user is None):
         self.process.user = user
     if j['process_type'] == DVAPQL.QUERY:
         image_data = None
         if j['image_data_b64'].strip():
             image_data = base64.decodestring(j['image_data_b64'])
             j['image_data_b64'] = None
         self.process.process_type = DVAPQL.QUERY
         self.process.script = j
         self.process.save()
         if image_data:
             query_path = "{}/queries/{}.png".format(settings.MEDIA_ROOT, self.process.uuid)
             redis_client.set("/queries/{}.png".format(self.process.uuid), image_data, ex=1200)
             with open(query_path, 'w') as fh:
                 fh.write(image_data)
             if settings.ENABLE_CLOUDFS:
                 query_key = "/queries/{}.png".format(self.process.uuid)
                 fs.upload_file_to_remote(query_key)
                 os.remove(query_path)
     elif j['process_type'] == DVAPQL.PROCESS:
         self.process.process_type = DVAPQL.PROCESS
         self.process.script = j
         self.process.save()
     elif j['process_type'] == DVAPQL.SCHEDULE:
         raise NotImplementedError
     else:
         raise ValueError
     return self.process
Ejemplo n.º 2
0
 def create_from_json(self, j, user=None):
     if self.process is None:
         self.process = DVAPQL()
     if not (user is None):
         self.process.user = user
     if j['process_type'] == DVAPQL.QUERY:
         if j['image_data_b64'].strip():
             image_data = base64.decodestring(j['image_data_b64'])
             j['image_data_b64'] = None
             self.process.image_data = image_data
         self.process.process_type = DVAPQL.QUERY
         self.process.script = j
         self.process.save()
         self.store()
     elif j['process_type'] == DVAPQL.PROCESS:
         self.process.process_type = DVAPQL.PROCESS
         self.process.script = j
         self.process.save()
     elif j['process_type'] == DVAPQL.INGEST:
         raise NotImplementedError
     else:
         raise ValueError
     return self.process
Ejemplo n.º 3
0
class DVAPQLProcess(object):
    def __init__(self, process=None, media_dir=None):
        self.process = process
        self.media_dir = media_dir
        self.task_results = {}
        self.created_objects = []

    def store(self):
        query_path = "{}/queries/{}.png".format(settings.MEDIA_ROOT,
                                                self.process.pk)
        with open(query_path, 'w') as fh:
            fh.write(self.process.image_data)
        if settings.DISABLE_NFS:
            query_key = "/queries/{}.png".format(self.process.pk)
            fs.upload_file_to_remote(query_key)
            os.remove(query_path)

    def create_from_json(self, j, user=None):
        if self.process is None:
            self.process = DVAPQL()
        if not (user is None):
            self.process.user = user
        if j['process_type'] == DVAPQL.QUERY:
            if j['image_data_b64'].strip():
                image_data = base64.decodestring(j['image_data_b64'])
                j['image_data_b64'] = None
                self.process.image_data = image_data
            self.process.process_type = DVAPQL.QUERY
            self.process.script = j
            self.process.save()
            self.store()
        elif j['process_type'] == DVAPQL.PROCESS:
            self.process.process_type = DVAPQL.PROCESS
            self.process.script = j
            self.process.save()
        elif j['process_type'] == DVAPQL.INGEST:
            raise NotImplementedError
        else:
            raise ValueError
        return self.process

    def validate(self):
        pass

    def launch(self):
        if self.process.script['process_type'] == DVAPQL.PROCESS:
            for c in self.process.script.get('create', []):
                m = apps.get_model(app_label='dvaapp', model_name=c['MODEL'])
                for k, v in c['spec'].iteritems():
                    if v == '__timezone.now__':
                        c['spec'][k] = timezone.now()
                instance = m.objects.create(**c['spec'])
                self.created_objects.append(instance)
                for t in copy.deepcopy(c.get('tasks', [])):
                    self.launch_task(t, instance.pk)
            for t in self.process.script.get('tasks', []):
                self.launch_task(t)
        elif self.process.script['process_type'] == DVAPQL.QUERY:
            for t in self.process.script['tasks']:
                operation = t['operation']
                arguments = t.get('arguments', {})
                queue_name, operation = get_queue_name_and_operation(
                    operation, arguments)
                next_task = TEvent.objects.create(parent_process=self.process,
                                                  operation=operation,
                                                  arguments=arguments,
                                                  queue=queue_name)
                self.task_results[next_task.pk] = app.send_task(
                    name=operation,
                    args=[
                        next_task.pk,
                    ],
                    queue=queue_name,
                    priority=5)
        else:
            raise NotImplementedError

    def wait(self, timeout=60):
        for _, result in self.task_results.iteritems():
            try:
                next_task_ids = result.get(timeout=timeout)
                if next_task_ids:
                    for next_task_id in next_task_ids:
                        next_result = AsyncResult(id=next_task_id)
                        _ = next_result.get(timeout=timeout)
            except Exception, e:
                raise ValueError(e)
Ejemplo n.º 4
0
class DVAPQLProcess(object):

    def __init__(self, process=None, media_dir=None):
        self.process = process
        self.media_dir = media_dir
        self.task_results = {}
        self.created_objects = []
        self.task_group_index = 0
        self.task_group_name_to_index = {}
        self.parent_task_group_index = {}
        self.root_task = None

    def launch_root_task(self):
        pass

    def create_from_json(self, j, user=None):
        if self.process is None:
            self.process = DVAPQL()
        if not (user is None):
            self.process.user = user
        if j['process_type'] == DVAPQL.QUERY:
            image_data = None
            if j['image_data_b64'].strip():
                image_data = base64.decodestring(j['image_data_b64'])
                j['image_data_b64'] = None
            self.process.process_type = DVAPQL.QUERY
            self.process.script = j
            self.process.save()
            if image_data:
                query_path = "{}/queries/{}.png".format(settings.MEDIA_ROOT, self.process.uuid)
                redis_client.set("/queries/{}.png".format(self.process.uuid), image_data, ex=1200)
                with open(query_path, 'w') as fh:
                    fh.write(image_data)
                if settings.ENABLE_CLOUDFS:
                    query_key = "/queries/{}.png".format(self.process.uuid)
                    fs.upload_file_to_remote(query_key)
                    os.remove(query_path)
        elif j['process_type'] == DVAPQL.PROCESS:
            self.process.process_type = DVAPQL.PROCESS
            self.process.script = j
            self.process.save()
        elif j['process_type'] == DVAPQL.SCHEDULE:
            raise NotImplementedError
        else:
            raise ValueError
        return self.process

    def validate(self):
        pass

    def assign_task_group_id(self, tasks, parent_group_index=None):
        for t in tasks:
            t['task_group_id'] = self.task_group_index
            self.task_group_index += 1
            if parent_group_index:
                self.parent_task_group_index[t['task_group_id']] = parent_group_index
            task_group_name = t['arguments'].get('task_group_name', None)
            if task_group_name:
                if task_group_name in self.task_group_name_to_index:
                    self.process.failed = True
                    self.process.error_message = "Repeated task group name."
                else:
                    self.task_group_name_to_index[task_group_name] = t['task_group_id']
            if 'map' in t.get('arguments', {}):
                self.assign_task_group_id(t['arguments']['map'], t['task_group_id'])
            if 'reduce' in t.get('arguments', {}):
                self.assign_task_group_id(t['arguments']['reduce'], t['task_group_id'])

    def launch(self):
        if self.process.script['process_type'] == DVAPQL.PROCESS:
            self.delete_instances()
            self.create_root_task()
            self.create_instances()
            self.launch_processing_tasks()
            self.launch_process_monitor()
        elif self.process.script['process_type'] == DVAPQL.QUERY:
            self.launch_query_tasks()
        else:
            raise NotImplementedError
        self.process.script['task_group_name_to_index'] = self.task_group_name_to_index
        self.process.script['parent_task_group_index'] = self.parent_task_group_index
        self.process.save()

    def delete_instances(self):
        for d in self.process.script.get('delete', []):
            if d['MODEL'] == 'Video':
                d_copy = copy.deepcopy(d)
                m = apps.get_model(app_label='dvaapp', model_name=d['MODEL'])
                instance = m.objects.get(**d_copy['selector'])
                DeletedVideo.objects.create(deleter=self.process.user, video_uuid=instance.pk)
                instance.delete()
            else:
                self.process.failed = True
                self.process.error_message = "Cannot delete {}; Only video deletion implemented.".format(d['MODEL'])

    def create_instances(self):
        for c in self.process.script.get('create', []):
            c_copy = copy.deepcopy(c)
            m = apps.get_model(app_label='dvaapp', model_name=c['MODEL'])
            for k, v in c['spec'].iteritems():
                if v == '__timezone.now__':
                    c_copy['spec'][k] = timezone.now()
            if c['MODEL'] != 'Video' and c['MODEL'] != 'TrainingSet':
                c_copy['spec']['event_id'] = self.root_task.pk
            instance = m.objects.create(**c_copy['spec'])
            self.created_objects.append(instance)

    def create_root_task(self):
        self.root_task = TEvent.objects.create(operation="perform_launch", task_group_id=self.task_group_index,
                                               completed=True, started=True,
                                               parent_process_id=self.process.pk, queue="sync")
        self.task_group_index += 1

    def launch_processing_tasks(self):
        self.assign_task_group_id(self.process.script.get('map', []), 0)
        for t in self.process.script.get('map', []):
            self.launch_task(t)
        self.assign_task_group_id(self.process.script.get('reduce', []), 0)
        for t in self.process.script.get('reduce', []):
            if 'operation' not in t:
                t['operation'] = 'perform_reduce'
                self.launch_task(t)
            else:
                raise ValueError('{} is not a valid reduce operation, reduce tasks should not have an operation'.format(
                    t['operation']))

    def launch_query_tasks(self):
        self.assign_task_group_id(self.process.script.get('map', []))
        for t in self.process.script['map']:
            operation = t['operation']
            arguments = t.get('arguments', {})
            queue_name, operation = get_queue_name_and_operation(operation, arguments)
            next_task = TEvent.objects.create(parent_process=self.process, operation=operation, arguments=arguments,
                                              queue=queue_name, task_group_id=t['task_group_id'])
            self.task_results[next_task.pk] = app.send_task(name=operation, args=[next_task.pk, ], queue=queue_name,
                                                            priority=5)

    def launch_process_monitor(self):
        monitoring_task = TEvent.objects.create(operation="perform_process_monitoring", arguments={}, parent=None,
                                                task_group_id=-1, parent_process=self.process,
                                                queue=settings.Q_REDUCER)
        app.send_task(name=monitoring_task.operation, args=[monitoring_task.pk, ],
                      queue=monitoring_task.queue)

    def wait_query(self, timeout=60):
        if self.process.process_type != DVAPQL.QUERY:
            raise ValueError("wait query is only supported by Query processes")
        for _, result in self.task_results.iteritems():
            try:
                next_task_ids = result.get(timeout=timeout)
                while next_task_ids:
                    if type(next_task_ids) is list:
                        for next_task_id in next_task_ids:
                            next_result = AsyncResult(id=next_task_id)
                            next_task_ids = next_result.get(timeout=timeout)
            except Exception, e:
                raise ValueError(e)
Ejemplo n.º 5
0
class DVAPQLProcess(object):
    def __init__(self, process=None, media_dir=None):
        self.process = process
        self.media_dir = media_dir
        self.task_results = {}
        self.created_objects = []
        self.task_group_index = 0

    def create_from_json(self, j, user=None):
        if self.process is None:
            self.process = DVAPQL()
        if not (user is None):
            self.process.user = user
        if j['process_type'] == DVAPQL.QUERY:
            image_data = None
            if j['image_data_b64'].strip():
                image_data = base64.decodestring(j['image_data_b64'])
                j['image_data_b64'] = None
            self.process.process_type = DVAPQL.QUERY
            self.process.script = j
            self.process.save()
            if image_data:
                query_path = "{}/queries/{}.png".format(
                    settings.MEDIA_ROOT, self.process.uuid)
                redis_client.set("/queries/{}.png".format(self.process.uuid),
                                 image_data,
                                 ex=1200)
                with open(query_path, 'w') as fh:
                    fh.write(image_data)
                if settings.DISABLE_NFS:
                    query_key = "/queries/{}.png".format(self.process.uuid)
                    fs.upload_file_to_remote(query_key)
                    os.remove(query_path)
        elif j['process_type'] == DVAPQL.PROCESS:
            self.process.process_type = DVAPQL.PROCESS
            self.process.script = j
            self.process.save()
        elif j['process_type'] == DVAPQL.SCHEDULE:
            raise NotImplementedError
        else:
            raise ValueError
        return self.process

    def validate(self):
        pass

    def assign_task_group_id(self, tasks):
        for t in tasks:
            t['task_group_id'] = self.task_group_index
            self.task_group_index += 1
            if 'map' in t.get('arguments', {}):
                self.assign_task_group_id(t['arguments']['map'])
            if 'reduce' in t.get('arguments', {}):
                self.assign_task_group_id(t['arguments']['reduce'])

    def launch(self):
        if self.process.script['process_type'] == DVAPQL.PROCESS:
            for d in self.process.script.get('delete', []):
                if d['MODEL'] == 'Video':
                    d_copy = copy.deepcopy(d)
                    m = apps.get_model(app_label='dvaapp',
                                       model_name=d['MODEL'])
                    instance = m.objects.get(**d_copy['selector'])
                    DeletedVideo.objects.create(deleter=self.process.user,
                                                video_uuid=instance.pk)
                    instance.delete()
                else:
                    self.process.failed = True
                    self.process.error_message = "Cannot delete {}; Only video deletion implemented.".format(
                        d['MODEL'])
            self.assign_task_group_id(self.process.script.get('tasks', []))
            for c in self.process.script.get('create', []):
                c_copy = copy.deepcopy(c)
                m = apps.get_model(app_label='dvaapp', model_name=c['MODEL'])
                for k, v in c['spec'].iteritems():
                    if v == '__timezone.now__':
                        c_copy['spec'][k] = timezone.now()
                instance = m.objects.create(**c_copy['spec'])
                self.created_objects.append(instance)
                self.assign_task_group_id(c.get('tasks', []))
                for t in copy.deepcopy(c.get('tasks', [])):
                    self.launch_task(t, instance.pk)
            self.assign_task_group_id(self.process.script.get('tasks', []))
            for t in self.process.script.get('tasks', []):
                self.launch_task(t)
        elif self.process.script['process_type'] == DVAPQL.QUERY:
            for t in self.process.script['tasks']:
                operation = t['operation']
                arguments = t.get('arguments', {})
                queue_name, operation = get_queue_name_and_operation(
                    operation, arguments)
                next_task = TEvent.objects.create(parent_process=self.process,
                                                  operation=operation,
                                                  arguments=arguments,
                                                  queue=queue_name)
                self.task_results[next_task.pk] = app.send_task(
                    name=operation,
                    args=[
                        next_task.pk,
                    ],
                    queue=queue_name,
                    priority=5)
        else:
            raise NotImplementedError
        self.process.save()

    def wait(self, timeout=60):
        for _, result in self.task_results.iteritems():
            try:
                next_task_ids = result.get(timeout=timeout)
                if next_task_ids:
                    for next_task_id in next_task_ids:
                        next_result = AsyncResult(id=next_task_id)
                        _ = next_result.get(timeout=timeout)
            except Exception, e:
                raise ValueError(e)