def post(self, request, *args, **kwargs):
        form = ImportISATabFileForm(request.POST, request.FILES)

        if form.is_valid() or request.is_ajax():
            try:
                f = form.cleaned_data['isa_tab_file']
            except KeyError:
                f = None

            try:
                url = form.cleaned_data['isa_tab_url']
            except KeyError:
                url = None

            if url:
                response = self.import_by_url(url)
            else:
                try:
                    response = self.import_by_file(f)
                except Exception as e:
                    logger.error(traceback.format_exc(e))
                    return HttpResponseBadRequest("{} {}".format(
                        PARSER_UNEXPECTED_ERROR_MESSAGE, e))

            # get AWS Cognito identity ID
            if settings.REFINERY_DEPLOYMENT_PLATFORM == 'aws':
                try:
                    identity_id = request.POST.get('identity_id')
                except (KeyError, ValueError):
                    error_msg = 'identity_id is missing'
                    error = {'error_message': error_msg}
                    if request.is_ajax():
                        return HttpResponseBadRequest(
                            json.dumps({'error': error_msg}),
                            'application/json')
                    else:
                        return render(request, self.template_name, error)
            else:
                identity_id = None

            if not response['success']:
                if request.is_ajax():
                    return HttpResponseBadRequest(
                        json.dumps({'error': response["message"]}),
                        content_type='application/json')
                return render_to_response(self.template_name,
                                          context_instance=RequestContext(
                                              request, {
                                                  'form': form,
                                                  'error': response["message"]
                                              }))

            logger.debug("Temp file name: '%s'",
                         response['data']['temp_file_path'])

            try:
                parse_isatab_invocation = parse_isatab(
                    request.user.username,
                    False,
                    response['data']['temp_file_path'],
                    identity_id=identity_id)
            except ParserException as e:
                error_message = "{} {}".format(PARSER_ERROR_MESSAGE, e.message)
                logger.error(error_message)
                return HttpResponseBadRequest(error_message)
            except Exception as e:
                error_message = "{} {}".format(PARSER_UNEXPECTED_ERROR_MESSAGE,
                                               traceback.format_exc(e))
                logger.error(error_message)
                return HttpResponseBadRequest("{} {}".format(
                    PARSER_UNEXPECTED_ERROR_MESSAGE, e))
            else:
                dataset_uuid = parse_isatab_invocation

            try:
                os.unlink(response['data']['temp_file_path'])
            except OSError as e:
                logger.error("Couldn't unlink temporary file: %s %s",
                             response['data']['temp_file_path'], e)

            # import data files
            if dataset_uuid:
                try:
                    dataset = DataSet.objects.get(uuid=dataset_uuid)
                except (DataSet.DoesNotExist, DataSet.MultipleObjectsReturned):
                    logger.error(
                        "Cannot import data files for data set UUID '%s'",
                        dataset_uuid)
                else:
                    # start importing uploaded data files
                    for file_store_item in dataset.get_file_store_items():
                        if file_store_item.source.startswith(
                            (settings.REFINERY_DATA_IMPORT_DIR, 's3://')):
                            import_file.delay(file_store_item.uuid)

                if request.is_ajax():
                    return JsonResponse({
                        'success': 'Data set imported',
                        'data': {
                            'new_data_set_uuid': dataset_uuid
                        }
                    })
                return HttpResponseRedirect(
                    reverse(self.success_view_name, args=[dataset_uuid]))
            else:
                error = 'Problem parsing ISA-Tab file'
                if request.is_ajax():
                    return JsonResponse({'error': error})
                context = RequestContext(request, {
                    'form': form,
                    'error': error
                })
                return render_to_response(self.template_name,
                                          context_instance=context)
        else:  # submitted form is not valid
            context = RequestContext(request, {'form': form})
            return render_to_response(self.template_name,
                                      context_instance=context)
Beispiel #2
0
    def run(self):
        # create investigation, study and assay objects
        investigation = self._create_investigation()
        # FIXME: self.metadata_file.name may not be informative, especially in
        # case of temp files that don't exist on disk
        study = self._create_study(investigation=investigation,
                                   file_name=self.metadata_file.name)
        assay = self._create_assay(study=study,
                                   file_name=self.metadata_file.name)

        # import in file as "pre-isa" file
        logger.info("trying to add pre-isa archive file %s",
                    self.metadata_file.name)
        # FIXME: this will not create a FileStoreItem if self.metadata_file
        # does not exist on disk (e.g., a file object like TemporaryFile)
        investigation.pre_isarchive_file = create(self.metadata_file.name)
        import_file(investigation.pre_isarchive_file, refresh=True)
        investigation.save()

        # TODO: test if there are fewer columns than required
        logger.debug(
            "Parsing with file column %s and "
            "auxiliary file column %s", self.file_column_index,
            self.auxiliary_file_column_index)
        # UUIDs of data files to postpone importing until parsing is finished
        data_files = []
        # iterate over non-header rows in file
        for row in self.metadata_reader:
            # TODO: resolve relative indices
            internal_source_column_index = self.source_column_index
            internal_sample_column_index = self.sample_column_index
            internal_assay_column_index = self.assay_column_index
            # add data file to file store
            data_file_path = self.file_source_translator(
                row[self.file_column_index])
            data_file_uuid = create(source=data_file_path)
            data_files.append(data_file_uuid)
            # add auxiliary file to file store
            if self.auxiliary_file_column_index:
                auxiliary_file_path = self.file_source_translator(
                    row[self.auxiliary_file_column_index])
                auxiliary_file_uuid = create(source=auxiliary_file_path)
                data_files.append(auxiliary_file_uuid)
            else:
                auxiliary_file_uuid = None
            # add files to file server
            # TODO: add error handling in case of None values for UUIDs
            file_server.models.add(data_file_uuid, auxiliary_file_uuid)
            # create nodes if file was successfully created
            # source node
            source_name = self._create_name(row, internal_source_column_index,
                                            self.file_column_index)
            source_node, is_source_new = Node.objects.get_or_create(
                study=study, name=source_name, type=Node.SOURCE)
            # sample node
            sample_name = self._create_name(row, internal_sample_column_index,
                                            self.file_column_index)
            sample_node, is_sample_new = Node.objects.get_or_create(
                study=study, name=sample_name, type=Node.SAMPLE)
            source_node.add_child(sample_node)
            # assay node
            assay_name = self._create_name(row, internal_assay_column_index,
                                           self.file_column_index)
            assay_node, is_assay_new = Node.objects.get_or_create(
                study=study, assay=assay, name=assay_name, type=Node.ASSAY)
            sample_node.add_child(assay_node)
            file_node = Node.objects.create(
                study=study,
                assay=assay,
                name=row[self.file_column_index].strip(),
                file_uuid=data_file_uuid,
                type=Node.RAW_DATA_FILE,
                species=self._get_species(row),
                genome_build=self._get_genome_build(row),
                is_annotation=self._is_annotation(row))
            assay_node.add_child(file_node)
            # iterate over columns to create attributes to attach to sample
            # node
            for column_index in range(0, len(row)):
                # skip data file column
                if (self.file_column_index == column_index
                        or self.auxiliary_file_column_index == column_index
                        or self.annotation_column_index == column_index):
                    continue
                # create attribute as characteristic and attach to sample node
                # if the sample node was newly created
                if is_sample_new:
                    Attribute.objects.create(
                        node=sample_node,
                        type=Attribute.CHARACTERISTICS,
                        subtype=self.headers[column_index].strip().lower(),
                        value=row[column_index].strip())

        # Start remote file import tasks if `Make Import Permanent:` flag set
        # by the user
        # Likewise, we'll try to import these files if their source begins with
        # our REFINERY_DATA_IMPORT_DIR setting (This will be the case if
        # users upload datafiles associated with their metadata)

        for uuid in data_files:
            try:
                file_store_item = FileStoreItem.objects.get(uuid=uuid)
            except (FileStoreItem.DoesNotExist,
                    FileStoreItem.MultipleObjectsReturned) as e:
                logger.error("Couldn't properly fetch FileStoreItem %s", e)
            else:

                if (self.file_permanent or file_store_item.source.startswith(
                    (settings.REFINERY_DATA_IMPORT_DIR, 's3://'))):
                    import_file.delay(uuid)

        return investigation
    def run(self):
        # create investigation, study and assay objects
        investigation = self._create_investigation()
        # FIXME: self.metadata_file.name may not be informative, especially in
        # case of temp files that don't exist on disk
        study = self._create_study(investigation=investigation,
                                   file_name=self.metadata_file.name)
        assay = self._create_assay(study=study,
                                   file_name=self.metadata_file.name)

        # import in file as "pre-isa" file
        logger.info("trying to add pre-isa archive file %s",
                    self.metadata_file.name)
        # FIXME: this will not create a FileStoreItem if self.metadata_file
        # does not exist on disk (e.g., a file object like TemporaryFile)
        investigation.pre_isarchive_file = create(
            self.metadata_file.name, permanent=True)
        import_file(investigation.pre_isarchive_file, refresh=True)
        investigation.save()

        # TODO: test if there are fewer columns than required
        logger.debug("Parsing with file column %s and "
                     "auxiliary file column %s",
                     self.file_column_index, self.auxiliary_file_column_index)
        # UUIDs of data files to postpone importing until parsing is finished
        data_files = []
        # iterate over non-header rows in file
        for row in self.metadata_reader:
            # TODO: resolve relative indices
            internal_source_column_index = self.source_column_index
            internal_sample_column_index = self.sample_column_index
            internal_assay_column_index = self.assay_column_index
            # add data file to file store
            data_file_path = self.file_source_translator(
                row[self.file_column_index])
            data_file_uuid = create(
                source=data_file_path, permanent=self.file_permanent)
            data_files.append(data_file_uuid)
            # add auxiliary file to file store
            if self.auxiliary_file_column_index:
                auxiliary_file_path = self.file_source_translator(
                    row[self.auxiliary_file_column_index])
                auxiliary_file_uuid = create(
                    source=auxiliary_file_path, permanent=self.file_permanent)
                data_files.append(auxiliary_file_uuid)
            else:
                auxiliary_file_uuid = None
            # add files to file server
            # TODO: add error handling in case of None values for UUIDs
            file_server.models.add(data_file_uuid, auxiliary_file_uuid)
            # create nodes if file was successfully created
            # source node
            source_name = self._create_name(
                row, internal_source_column_index, self.file_column_index)
            source_node, is_source_new = Node.objects.get_or_create(
                study=study, name=source_name, type=Node.SOURCE)
            # sample node
            sample_name = self._create_name(
                row, internal_sample_column_index, self.file_column_index)
            sample_node, is_sample_new = Node.objects.get_or_create(
                study=study, name=sample_name, type=Node.SAMPLE)
            source_node.add_child(sample_node)
            # assay node
            assay_name = self._create_name(
                row, internal_assay_column_index, self.file_column_index)
            assay_node, is_assay_new = Node.objects.get_or_create(
                study=study, assay=assay, name=assay_name, type=Node.ASSAY)
            sample_node.add_child(assay_node)
            file_node = Node.objects.create(
                study=study, assay=assay,
                name=row[self.file_column_index].strip(),
                file_uuid=data_file_uuid, type=Node.RAW_DATA_FILE,
                species=self._get_species(row),
                genome_build=self._get_genome_build(row),
                is_annotation=self._is_annotation(row))
            assay_node.add_child(file_node)
            # iterate over columns to create attributes to attach to sample
            # node
            for column_index in range(0, len(row)):
                # skip data file column
                if (self.file_column_index == column_index or
                        self.auxiliary_file_column_index == column_index or
                        self.annotation_column_index == column_index):
                    continue
                # create attribute as characteristic and attach to sample node
                # if the sample node was newly created
                if is_sample_new:
                    Attribute.objects.create(
                        node=sample_node, type=Attribute.CHARACTERISTICS,
                        subtype=self.headers[column_index].strip().lower(),
                        value=row[column_index].strip()
                    )
        # kick off data file importing tasks
        for uuid in data_files:
            import_file.delay(uuid)
        return investigation