def migrate_data_references(self): """Migrate data references.""" def map_reference(reference): """Map references to new IDs.""" try: return self.id_mapping['data'][reference] except KeyError as error: self.missing_data.add(error.message) # pylint: disable=no-member return None # Fix references in JSON documents in the second pass. for new_id in self.id_mapping['data'].values(): data = Data.objects.get(pk=new_id) for field_schema, fields in iterate_fields(data.input, data.process.input_schema): if 'type' not in field_schema: continue name = field_schema['name'] value = fields[name] if field_schema['type'].startswith('data:'): fields[name] = map_reference(value) elif field_schema['type'].startswith('list:data:'): fields[name] = map(map_reference, value) data.save()
def dependency_status(data): """Return abstracted satus of dependencies. STATUS_ERROR .. one dependency has error status STATUS_DONE .. all dependencies have done status None .. other """ for field_schema, fields in iterate_fields(data.input, data.process.input_schema): if (field_schema['type'].lower().startswith('data:') or field_schema['type'].lower().startswith('list:data:')): name = field_schema['name'] value = fields[name] if field_schema['type'].lower().startswith('data:'): value = [value] for uid in value: try: _data = Data.objects.get(id=uid) except Data.DoesNotExist: return Data.STATUS_ERROR if _data.status == Data.STATUS_ERROR: return Data.STATUS_ERROR if _data.status != Data.STATUS_DONE: return None return Data.STATUS_DONE
def migrate_data_references(self): """Migrate data references.""" def map_reference(reference): """Map references to new IDs.""" try: return self.id_mapping['data'][reference] except KeyError as error: self.missing_data.add(error.message) # pylint: disable=no-member return None # Fix references in JSON documents in the second pass. for new_id in self.id_mapping['data'].values(): data = Data.objects.get(pk=new_id) for field_schema, fields in iterate_fields( data.input, data.process.input_schema): if 'type' not in field_schema: continue name = field_schema['name'] value = fields[name] if field_schema['type'].startswith('data:'): fields[name] = map_reference(value) elif field_schema['type'].startswith('list:data:'): fields[name] = map(map_reference, value) data.save()
def get_purge_files(root, output, output_schema, descriptor, descriptor_schema): def remove_file(fn, paths): """From paths remove fn and dirs before fn in dir tree.""" while fn: for i in range(len(paths) - 1, -1, -1): if fn == paths[i]: paths.pop(i) fn, _ = os.path.split(fn) def remove_tree(fn, paths): """From paths remove fn and dirs before or after fn in dir tree.""" for i in range(len(paths) - 1, -1, -1): head = paths[i] while head: if fn == head: paths.pop(i) break head, _ = os.path.split(head) remove_file(fn, paths) def subfiles(root): """Extend unreferenced list with all subdirs and files in top dir.""" subs = [] for path, dirs, files in os.walk(root, topdown=False): path = path[len(root) + 1:] subs.extend(os.path.join(path, f) for f in files) subs.extend(os.path.join(path, d) for d in dirs) return subs unreferenced_files = subfiles(root) remove_file('jsonout.txt', unreferenced_files) remove_file('stderr.txt', unreferenced_files) remove_file('stdout.txt', unreferenced_files) meta_fields = [ [output, output_schema], [descriptor, descriptor_schema] ] for meta_field, meta_field_schema in meta_fields: for field_schema, fields in iterate_fields(meta_field, meta_field_schema): if 'type' in field_schema: field_type = field_schema['type'] field_name = field_schema['name'] # Remove basic:file: entries if field_type.startswith('basic:file:'): remove_file(fields[field_name]['file'], unreferenced_files) # Remove list:basic:file: entries elif field_type.startswith('list:basic:file:'): for field in fields[field_name]: remove_file(field['file'], unreferenced_files) # Remove basic:dir: entries elif field_type.startswith('basic:dir:'): remove_tree(fields[field_name]['dir'], unreferenced_files) # Remove list:basic:dir: entries elif field_type.startswith('list:basic:dir:'): for field in fields[field_name]: remove_tree(field['dir'], unreferenced_files) # Remove refs entries if field_type.startswith('basic:file:') or field_type.startswith('basic:dir:'): for ref in fields[field_name].get('refs', []): remove_tree(ref, unreferenced_files) elif field_type.startswith('list:basic:file:') or field_type.startswith('list:basic:dir:'): for field in fields[field_name]: for ref in field.get('refs', []): remove_tree(ref, unreferenced_files) return set([os.path.join(root, filename) for filename in unreferenced_files])
def migrate_data(self, data): contributor = self.get_contributor(data["author_id"]) # DESCRIPTOR SCHEMA ############################################ ds_fields = [] ds_fields.extend(data.get("static_schema", [])) ds_fields.extend(data.get("var_template", [])) ds_fields.sort(key=lambda d: d["name"]) ds_fields_dumped = json.dumps(ds_fields) if ds_fields_dumped in self.descriptor_schema_index: descriptor_schema = self.descriptor_schema_index[ds_fields_dumped] else: descriptor_schema = DescriptorSchema(schema=ds_fields) descriptor_schema.name = "data_{}_descriptor".format(data["_id"]) descriptor_schema.contributor = contributor descriptor_schema.save() self.descriptor_schema_index[ds_fields_dumped] = descriptor_schema descriptor = {} descriptor.update(data.get("static", {})) descriptor.update(data.get("var", {})) # PROCESS ###################################################### if "processor_version" not in data: data["processor_version"] = "0.0.0" process_slug = self.process_slug(data["processor_name"]) process_version = data["processor_version"] try: process = Process.objects.get(slug=process_slug, version=process_version) except Process.DoesNotExist: latest = Process.objects.filter(slug=process_slug).order_by("-version").first() if latest: process = Process() process.name = latest.name process.slug = latest.slug process.category = latest.category process.description = latest.description process.contributor = latest.contributor process.version = process_version process.type = data["type"] process.output_schema = data["output_schema"] process.input_schema = data.get("input_schema", {}) process.persistence = self.persistence_dict[data["persistence"]] process.run["script"] = 'gen-require common\ngen-error "Depricated process, use the latest version."' # XXX # process.created = # process.modified = process.save() # copy permissions from latest process for user, perms in get_users_with_perms(latest, attach_perms=True).iteritems(): for perm in perms: assign_perm(perm, user, process) for group, perms in get_groups_with_perms(latest, attach_perms=True).iteritems(): for perm in perms: assign_perm(perm, group, process) else: # Create dummy processor if there is no other version dummy_name = "Dummy processor of type {}".format(data["type"]) try: process = Process.objects.get(name=dummy_name) except Process.DoesNotExist: process = Process.objects.create( name=dummy_name, slug="non-existent", contributor=get_user_model().objects.filter(is_superuser=True).first(), type=data["type"], category="data:non-existent", run={"script": {'gen-require common\ngen-error "This processor is not intendent to be run."'}}, ) # DATA ######################################################### new = Data() new.name = data.get("static", {}).get("name", "") if len(new.name) > 100: self.long_names.append(new.name) new.name = new.name[:97] + "..." new.status = self.status_dict[data["status"]] new.process = process new.contributor = contributor new.input = data["input"] if "input" in data else {} new.output = data["output"] new.descriptor_schema = descriptor_schema new.descriptor = descriptor new.checksum = data.get("checksum", "") # XXX: Django will change this on create new.created = data["date_created"] # XXX: Django will change this on save new.modified = data["date_modified"] if "date_start" in data and "date_finish" in data: new.started = data["date_start"] new.finished = data["date_finish"] elif "date_finish" in data: new.started = data["date_finish"] new.finished = data["date_finish"] elif "date_start" in data: new.started = data["date_start"] new.finished = data["date_start"] else: new.started = datetime.fromtimestamp(0) new.finished = datetime.fromtimestamp(0) new.save() for case_id in data["case_ids"]: try: collection = Collection.objects.get(pk=self.id_mapping["collection"][str(case_id)]) except KeyError: self.missing_collections.add(str(case_id)) continue collection.data.add(new) for field_schema, fields, path in iterate_fields(data["output"], data["output_schema"], ""): if "type" in field_schema and field_schema["type"].startswith("basic:json:"): self.storage_index[fields[field_schema["name"]]] = {"id": new.pk, "path": path} self.migrate_permissions(new, data) self.id_mapping["data"][str(data["_id"])] = new.pk # DESCRIPTOR SCHEMA PERMISSIONS ################################ for user in get_users_with_perms(new): assign_perm("view_descriptorschema", user, obj=descriptor_schema) for group in get_groups_with_perms(new): assign_perm("view_descriptorschema", group, obj=descriptor_schema)
def run_process(self, process_slug, input_={}, assert_status=Data.STATUS_DONE, descriptor=None, descriptor_schema=None, run_manager=True, verbosity=0): """Run the specified process with the given inputs. If input is a file, file path should be given relative to the ``tests/files`` directory of a Django application. If ``assert_status`` is given, check if :class:`~resolwe.flow.models.Data` object's status matches it after the process has finished. :param str process_slug: slug of the :class:`~resolwe.flow.models.Process` to run :param dict ``input_``: :class:`~resolwe.flow.models.Process`'s input parameters .. note:: You don't have to specify parameters with defined default values. :param str ``assert_status``: desired status of the :class:`~resolwe.flow.models.Data` object :param dict descriptor: descriptor to set on the :class:`~resolwe.flow.models.Data` object :param dict descriptor_schema: descriptor schema to set on the :class:`~resolwe.flow.models.Data` object :return: object created by :class:`~resolwe.flow.models.Process` :rtype: ~resolwe.flow.models.Data """ # backward compatibility process_slug = slugify(process_slug.replace(':', '-')) process = Process.objects.filter(slug=process_slug).order_by('-version').first() def mock_upload(file_path): """Mock file upload.""" old_path = os.path.join(self.files_path, file_path) if not os.path.isfile(old_path): raise RuntimeError('Missing file: {}'.format(old_path)) new_path = os.path.join(self.upload_dir, file_path) # create directories needed by new_path new_path_dir = os.path.dirname(new_path) if not os.path.exists(new_path_dir): os.makedirs(new_path_dir) shutil.copy2(old_path, new_path) self._upload_files.append(new_path) return { 'file': file_path, 'file_temp': file_path, } for field_schema, fields in iterate_fields(input_, process.input_schema): # copy referenced files to upload dir if field_schema['type'] == "basic:file:": fields[field_schema['name']] = mock_upload(fields[field_schema['name']]) elif field_schema['type'] == "list:basic:file:": file_list = [mock_upload(file_path) for file_path in fields[field_schema['name']]] fields[field_schema['name']] = file_list # convert primary keys to strings if field_schema['type'].startswith('data:'): fields[field_schema['name']] = fields[field_schema['name']] if field_schema['type'].startswith('list:data:'): fields[field_schema['name']] = [obj for obj in fields[field_schema['name']]] data = Data.objects.create( input=input_, contributor=self.admin, process=process, slug=get_random_string(length=6), descriptor_schema=descriptor_schema, descriptor=descriptor or {}) self.collection.data.add(data) if run_manager: manager.communicate(run_sync=True, verbosity=verbosity) # Fetch latest Data object from database data = Data.objects.get(pk=data.pk) if not run_manager and assert_status == Data.STATUS_DONE: assert_status = Data.STATUS_RESOLVING if assert_status: self.assertStatus(data, assert_status) return data
def run_process(self, process_slug, input_={}, assert_status=Data.STATUS_DONE, descriptor=None, descriptor_schema=None, run_manager=True, verbosity=0): """Runs given processor with specified inputs. If input is file, file path should be given relative to ``tests/files`` folder of a Django application. If ``assert_status`` is given check if Data object's status matches ``assert_status`` after finishing processor. :param processor_name: name of the processor to run :type processor_name: :obj:`str` :param ``input_``: Input paramaters for processor. You don't have to specifie parameters for which default values are given. :type ``input_``: :obj:`dict` :param ``assert_status``: Desired status of Data object :type ``assert_status``: :obj:`str` :param descriptor: Descriptor to set on the data object. :type descriptor: :obj:`dict` :param descriptor_schema: Descriptor schema to set on the data object. :type descriptor_schema: :obj:`dict` :return: :obj:`resolwe.flow.models.Data` object which is created by the processor. """ # backward compatibility process_slug = slugify(process_slug.replace(':', '-')) p = Process.objects.get(slug=process_slug) def mock_upload(file_path): old_path = os.path.join(self.files_path, file_path) if not os.path.isfile(old_path): raise RuntimeError('Missing file: {}'.format(old_path)) new_path = os.path.join(self.upload_dir, file_path) # create directories needed by new_path new_path_dir = os.path.dirname(new_path) if not os.path.exists(new_path_dir): os.makedirs(new_path_dir) shutil.copy2(old_path, new_path) self._upload_files.append(new_path) return { 'file': file_path, 'file_temp': file_path, } for field_schema, fields in iterate_fields(input_, p.input_schema): # copy referenced files to upload dir if field_schema['type'] == "basic:file:": fields[field_schema['name']] = mock_upload(fields[field_schema['name']]) elif field_schema['type'] == "list:basic:file:": file_list = [mock_upload(file_path) for file_path in fields[field_schema['name']]] fields[field_schema['name']] = file_list # convert primary keys to strings if field_schema['type'].startswith('data:'): fields[field_schema['name']] = fields[field_schema['name']] if field_schema['type'].startswith('list:data:'): fields[field_schema['name']] = [obj for obj in fields[field_schema['name']]] d = Data.objects.create( input=input_, contributor=self.admin, process=p, slug=get_random_string(length=6), descriptor_schema=descriptor_schema, descriptor=descriptor or {}) self.collection.data.add(d) if run_manager: manager.communicate(run_sync=True, verbosity=verbosity) # Fetch latest Data object from database d = Data.objects.get(pk=d.pk) if not run_manager and assert_status == Data.STATUS_DONE: assert_status = Data.STATUS_RESOLVING if assert_status: self.assertStatus(d, assert_status) return d
def migrate_data(self, data): """Migrate data.""" contributor = self.get_contributor(data[u'author_id']) # DESCRIPTOR SCHEMA ############################################ ds_fields = [] ds_fields.extend(data.get(u'static_schema', [])) ds_fields.extend(data.get(u'var_template', [])) ds_fields.sort(key=lambda d: d[u'name']) ds_fields_dumped = json.dumps(ds_fields) if ds_fields_dumped in self.descriptor_schema_index: descriptor_schema = self.descriptor_schema_index[ds_fields_dumped] else: descriptor_schema = DescriptorSchema(schema=ds_fields) descriptor_schema.name = 'data_{}_descriptor'.format(data[u'_id']) descriptor_schema.contributor = contributor descriptor_schema.save() self.descriptor_schema_index[ds_fields_dumped] = descriptor_schema descriptor = {} descriptor.update(data.get(u'static', {})) descriptor.update(data.get(u'var', {})) # PROCESS ###################################################### if u'processor_version' not in data: data[u'processor_version'] = '0.0.0' process_slug = self.process_slug(data[u'processor_name']) process_version = data[u'processor_version'] try: process = Process.objects.get(slug=process_slug, version=process_version) except Process.DoesNotExist: latest = Process.objects.filter(slug=process_slug).order_by('-version').first() if latest: process = Process() process.name = latest.name process.slug = latest.slug process.category = latest.category process.description = latest.description process.contributor = latest.contributor process.version = process_version process.type = data[u'type'] process.output_schema = data[u'output_schema'] process.input_schema = data.get(u'input_schema', {}) process.persistence = self.persistence_dict[data[u'persistence']] process.run['script'] = 'gen-require common\ngen-error "Depricated process, use the latest version."' # noqa pylint: disable=unsubscriptable-object # XXX # process.created = # process.modified = process.save() # copy permissions from latest process for user, perms in six.iteritems(get_users_with_perms(latest, attach_perms=True)): for perm in perms: assign_perm(perm, user, process) for group, perms in six.iteritems(get_groups_with_perms(latest, attach_perms=True)): for perm in perms: assign_perm(perm, group, process) else: # Create dummy processor if there is no other version dummy_name = 'Dummy processor of type {}'.format(data[u'type']) try: process = Process.objects.get(name=dummy_name) except Process.DoesNotExist: process = Process.objects.create( name=dummy_name, slug='non-existent', contributor=get_user_model().objects.filter(is_superuser=True).first(), type=data[u'type'], category='data:non-existent', run={'script': {'gen-require common\ngen-error "This processor is not intendent to be run."'}}, ) # DATA ######################################################### new = Data() new.name = data.get(u'static', {}).get(u'name', '') if len(new.name) > 100: self.long_names.append(new.name) new.name = new.name[:97] + '...' new.status = self.status_dict[data[u'status']] new.process = process new.contributor = contributor new.input = data[u'input'] if u'input' in data else {} new.output = data[u'output'] new.descriptor_schema = descriptor_schema new.descriptor = descriptor new.checksum = data.get(u'checksum', '') # XXX: Django will change this on create new.created = data[u'date_created'] # XXX: Django will change this on save new.modified = data[u'date_modified'] if u'date_start' in data and u'date_finish' in data: new.started = data[u'date_start'] new.finished = data[u'date_finish'] elif u'date_finish' in data: new.started = data[u'date_finish'] new.finished = data[u'date_finish'] elif u'date_start' in data: new.started = data[u'date_start'] new.finished = data[u'date_start'] else: new.started = datetime.fromtimestamp(0) new.finished = datetime.fromtimestamp(0) new.save() for case_id in data[u'case_ids']: try: collection = Collection.objects.get(pk=self.id_mapping[u'collection'][str(case_id)]) except KeyError: self.missing_collections.add(str(case_id)) continue collection.data.add(new) for field_schema, fields, path in iterate_fields(data[u'output'], data[u'output_schema'], ''): if 'type' in field_schema and field_schema['type'].startswith('basic:json:'): self.storage_index[fields[field_schema['name']]] = { 'id': new.pk, 'path': path, } self.migrate_permissions(new, data) self.id_mapping['data'][str(data[u'_id'])] = new.pk # DESCRIPTOR SCHEMA PERMISSIONS ################################ for user in get_users_with_perms(new): assign_perm('view_descriptorschema', user, obj=descriptor_schema) for group in get_groups_with_perms(new): assign_perm('view_descriptorschema', group, obj=descriptor_schema)
def run(self, data_id, script, verbosity=1): """Execute the script and save results.""" if verbosity >= 1: print('RUN: {} {}'.format(data_id, script)) self.data_id = data_id data_dir = settings.FLOW_EXECUTOR['DATA_DIR'] dir_mode = getattr(settings, 'FLOW_EXECUTOR', {}).get('DATA_DIR_MODE', 0o755) output_path = os.path.join(data_dir, str(data_id)) os.mkdir(output_path) # os.mkdir is not guaranteed to set the given mode os.chmod(output_path, dir_mode) os.chdir(output_path) log_file = open('stdout.txt', 'w+') json_file = open('jsonout.txt', 'w+') proc_pid = self.start() self.update_data_status( status=Data.STATUS_PROCESSING, started=now(), process_pid=proc_pid ) # Run processor and handle intermediate results self.run_script(script) spawn_processors = [] output = {} process_error, process_warning, process_info = [], [], [] process_progress, process_rc = 0, 0 # read processor output try: stdout = self.get_stdout() while True: line = stdout.readline() if not line: break try: if line.strip().startswith('run'): # Save processor and spawn if no errors log_file.write(line) log_file.flush() for obj in iterjson(line[3:].strip()): spawn_processors.append(obj) elif line.strip().startswith('export'): file_name = line[6:].strip() export_folder = settings.FLOW_EXECUTOR['UPLOAD_DIR'] unique_name = 'export_{}'.format(uuid.uuid4().hex) export_path = os.path.join(export_folder, unique_name) EXPORTED_FILES_MAPPER[file_name] = unique_name shutil.move(file_name, export_path) else: # If JSON, save to MongoDB updates = {} for obj in iterjson(line): for key, val in six.iteritems(obj): if key.startswith('proc.'): if key == 'proc.error': process_error.append(val) if not process_rc: process_rc = 1 updates['process_rc'] = process_rc updates['process_error'] = process_error updates['status'] = Data.STATUS_ERROR elif key == 'proc.warning': process_warning.append(val) updates['process_warning'] = process_warning elif key == 'proc.info': process_info.append(val) updates['process_info'] = process_info elif key == 'proc.rc': process_rc = int(val) updates['process_rc'] = process_rc if process_rc != 0: updates['status'] = Data.STATUS_ERROR elif key == 'proc.progress': process_progress = int(float(val) * 100) updates['process_progress'] = process_progress else: dict_dot(output, key, val) updates['output'] = output if updates: updates['modified'] = now() self.update_data_status(**updates) if process_rc > 0: log_file.close() json_file.close() os.chdir(CWD) return # Debug output # Not referenced in Data object json_file.write(line) json_file.flush() except ValueError as ex: # Ignore if not JSON log_file.write(line) log_file.flush() except MemoryError as ex: logger.error(__("Out of memory: {}", ex)) except IOError as ex: # TODO: if ex.errno == 28: no more free space raise ex finally: # Store results log_file.close() json_file.close() os.chdir(CWD) return_code = self.end() if process_rc < return_code: process_rc = return_code if spawn_processors and process_rc == 0: parent_data = Data.objects.get(pk=self.data_id) # Spawn processors for d in spawn_processors: d['contributor'] = parent_data.contributor d['process'] = Process.objects.filter(slug=d['process']).order_by('version').last() for field_schema, fields in iterate_fields(d.get('input', {}), d['process'].input_schema): type_ = field_schema['type'] name = field_schema['name'] value = fields[name] if type_ == 'basic:file:': fields[name] = hydrate_spawned_files(value, data_id) elif type_ == 'list:basic:file:': fields[name] = [hydrate_spawned_files(fn, data_id) for fn in value] with transaction.atomic(): d = Data.objects.create(**d) for collection in parent_data.collection_set.all(): collection.data.add(d) if process_rc == 0: self.update_data_status( status=Data.STATUS_DONE, process_progress=100, finished=now() ) else: self.update_data_status( status=Data.STATUS_ERROR, process_progress=100, process_rc=process_rc, finished=now() ) try: # Cleanup after processor if data_id != 'no_data_id': data_purge(data_ids=[data_id], delete=True, verbosity=verbosity) except: # pylint: disable=bare-except logger.error(__("Purge error:\n\n{}", traceback.format_exc()))
def migrate_data(self, data): """Migrate data.""" contributor = self.get_contributor(data[u'author_id']) # DESCRIPTOR SCHEMA ############################################ ds_fields = [] ds_fields.extend(data.get(u'static_schema', [])) ds_fields.extend(data.get(u'var_template', [])) ds_fields.sort(key=lambda d: d[u'name']) ds_fields_dumped = json.dumps(ds_fields) if ds_fields_dumped in self.descriptor_schema_index: descriptor_schema = self.descriptor_schema_index[ds_fields_dumped] else: descriptor_schema = DescriptorSchema(schema=ds_fields) descriptor_schema.name = 'data_{}_descriptor'.format(data[u'_id']) descriptor_schema.contributor = contributor descriptor_schema.save() self.descriptor_schema_index[ds_fields_dumped] = descriptor_schema descriptor = {} descriptor.update(data.get(u'static', {})) descriptor.update(data.get(u'var', {})) # PROCESS ###################################################### if u'processor_version' not in data: data[u'processor_version'] = '0.0.0' process_slug = self.process_slug(data[u'processor_name']) process_version = data[u'processor_version'] try: process = Process.objects.get(slug=process_slug, version=process_version) except Process.DoesNotExist: latest = Process.objects.filter( slug=process_slug).order_by('-version').first() if latest: process = Process() process.name = latest.name process.slug = latest.slug process.category = latest.category process.description = latest.description process.contributor = latest.contributor process.version = process_version process.type = data[u'type'] process.output_schema = data[u'output_schema'] process.input_schema = data.get(u'input_schema', {}) process.persistence = self.persistence_dict[ data[u'persistence']] process.run[ 'script'] = 'gen-require common\ngen-error "Depricated process, use the latest version."' # noqa pylint: disable=unsubscriptable-object # XXX # process.created = # process.modified = process.save() # copy permissions from latest process for user, perms in six.iteritems( get_users_with_perms(latest, attach_perms=True)): for perm in perms: assign_perm(perm, user, process) for group, perms in six.iteritems( get_groups_with_perms(latest, attach_perms=True)): for perm in perms: assign_perm(perm, group, process) else: # Create dummy processor if there is no other version dummy_name = 'Dummy processor of type {}'.format(data[u'type']) try: process = Process.objects.get(name=dummy_name) except Process.DoesNotExist: process = Process.objects.create( name=dummy_name, slug='non-existent', contributor=get_user_model().objects.filter( is_superuser=True).first(), type=data[u'type'], category='data:non-existent', run={ 'script': { 'gen-require common\ngen-error "This processor is not intendent to be run."' } }, ) # DATA ######################################################### new = Data() new.name = data.get(u'static', {}).get(u'name', '') if len(new.name) > 100: self.long_names.append(new.name) new.name = new.name[:97] + '...' new.status = self.status_dict[data[u'status']] new.process = process new.contributor = contributor new.input = data[u'input'] if u'input' in data else {} new.output = data[u'output'] new.descriptor_schema = descriptor_schema new.descriptor = descriptor new.checksum = data.get(u'checksum', '') # XXX: Django will change this on create new.created = data[u'date_created'] # XXX: Django will change this on save new.modified = data[u'date_modified'] if u'date_start' in data and u'date_finish' in data: new.started = data[u'date_start'] new.finished = data[u'date_finish'] elif u'date_finish' in data: new.started = data[u'date_finish'] new.finished = data[u'date_finish'] elif u'date_start' in data: new.started = data[u'date_start'] new.finished = data[u'date_start'] else: new.started = datetime.fromtimestamp(0) new.finished = datetime.fromtimestamp(0) new.save() for case_id in data[u'case_ids']: try: collection = Collection.objects.get( pk=self.id_mapping[u'collection'][str(case_id)]) except KeyError: self.missing_collections.add(str(case_id)) continue collection.data.add(new) for field_schema, fields, path in iterate_fields( data[u'output'], data[u'output_schema'], ''): if 'type' in field_schema and field_schema['type'].startswith( 'basic:json:'): self.storage_index[fields[field_schema['name']]] = { 'id': new.pk, 'path': path, } self.migrate_permissions(new, data) self.id_mapping['data'][str(data[u'_id'])] = new.pk # DESCRIPTOR SCHEMA PERMISSIONS ################################ for user in get_users_with_perms(new): assign_perm('view_descriptorschema', user, obj=descriptor_schema) for group in get_groups_with_perms(new): assign_perm('view_descriptorschema', group, obj=descriptor_schema)
def run_process(self, process_slug, input_={}, assert_status=Data.STATUS_DONE, run_manager=True, verbosity=0): """Runs given processor with specified inputs. If input is file, file path should be given relative to ``tests/files`` folder of a Django application. If ``assert_status`` is given check if Data object's status matches ``assert_status`` after finishing processor. :param processor_name: name of the processor to run :type processor_name: :obj:`str` :param ``input_``: Input paramaters for processor. You don't have to specifie parameters for which default values are given. :type ``input_``: :obj:`dict` :param ``assert_status``: Desired status of Data object :type ``assert_status``: :obj:`str` :return: :obj:`resolwe.flow.models.Data` object which is created by the processor. """ # backward compatibility process_slug = slugify(process_slug.replace(':', '-')) p = Process.objects.get(slug=process_slug) for field_schema, fields in iterate_fields(input_, p.input_schema): # copy referenced files to upload dir if field_schema['type'] == "basic:file:": for app_config in apps.get_app_configs(): old_path = os.path.join(app_config.path, 'tests', 'files', fields[field_schema['name']]) if os.path.isfile(old_path): file_name = os.path.basename(fields[field_schema['name']]) new_path = os.path.join(self.upload_path, file_name) shutil.copy2(old_path, new_path) self._upload_files.append(new_path) # since we don't know what uid/gid will be used inside Docker executor, # we must give others read and write permissions os.chmod(new_path, 0o666) fields[field_schema['name']] = { 'file': file_name, 'file_temp': file_name, } break # convert primary keys to strings if field_schema['type'].startswith('data:'): fields[field_schema['name']] = str(fields[field_schema['name']]) if field_schema['type'].startswith('list:data:'): fields[field_schema['name']] = [str(obj) for obj in fields[field_schema['name']]] d = Data.objects.create( input=input_, contributor=self.admin, process=p, slug=get_random_string(length=6)) self.collection.data.add(d) if run_manager: manager.communicate(run_sync=True, verbosity=verbosity) # Fetch latest Data object from database d = Data.objects.get(pk=d.pk) if not run_manager and assert_status == Data.STATUS_DONE: assert_status = Data.STATUS_RESOLVING if assert_status: self.assertStatus(d, assert_status) return d