Exemple #1
0
    def _get_deposit_result(self, response):
        """
        Processes the deposit result as presented by sword.
        We try to set the splash url.
        We set deposit_status to pending.
        We do not set a pdf_url because we expect moderation, so a pdf_url would be a dead link (for samoe time).
        """
        try:
            sword_statement = etree.fromstring(bytes(response, encoding='utf-8'))
        except etree.XMLSyntaxError:
            self.log('Invalid XML response from {}'.format(self.repository.name))
            raise DepositError(_('The repository {} returned invalid XML').format(self.repository.name))

        # We assume one element for the splash url, so we take the first one
        link_alternate = sword_statement.find("atom:link[@rel='alternate']", namespaces=NSMAP)
        print(link_alternate)
        if link_alternate is not None:
            splash_url = link_alternate.get('href', None)
        else:
            splash_url = None

        if splash_url:
            identifier = splash_url.split('/')[-1]
        else:
            identifier = None
            msg = "Found no splash url in XML reposonse from repository {}. Either no link[@ref='alternate'] was present or the href was missing.".format(self.repository.name)
            self.log(msg)
            logger.warning(msg)

        # We expect that SWORD Repos usually have moderation. If this is at some point not the case, we can make this more flexible
        status = 'pending'

        deposit_result = DepositResult(identifier=identifier, splash_url=splash_url, status=status)

        return deposit_result
Exemple #2
0
    def submit_deposit(self, pdf, form, dry_run=False):
        if self.repository.api_key is None:
            raise DepositError(__("No OSF token provided."))

        api_key = self.repository.api_key
        license_id = form.cleaned_data['license']

        deposit_result = DepositResult()

        # Creating the metadata
        self.log("### Creating the metadata")
        min_node_structure, authors, paper_doi, pub_date = (
            self.createMetadata(form))
        self.log(json.dumps(min_node_structure, indent=4) + '')
        self.log(json.dumps(authors, indent=4) + '')

        # Creating a new depository
        self.log("### Creating a new depository")
        headers = {
            'Authorization': 'Bearer %s' % api_key,
            'Content-Type': 'application/vnd.api+json'
        }

        osf_response = self.create_node()
        node_id = osf_response['data']['id']

        self.osf_storage_data = get_newnode_osf_storage(node_id)
        osf_links = self.osf_storage_data['data']
        osf_upload_link = str(
            list({translate_links(entry)
                  for entry in osf_links}))
        osf_upload_link = osf_upload_link.replace("[u'", '').replace("']", '')

        # Uploading the PDF
        self.log("### Uploading the PDF")
        upload_url_suffix = "?kind=file&name=article.pdf"
        upload_url = osf_upload_link + upload_url_suffix
        data = open(pdf, 'r')
        primary_file_data = requests.put(upload_url,
                                         data=data,
                                         headers=headers)
        self.log_request(primary_file_data, 201,
                         __('Unable to upload the PDF file.'))
        primary_file_data = primary_file_data.json()
        # Uncomment pf_path when time to test the preprint upload has come
        pf_path = primary_file_data['data']['attributes']['path'][1:]

        add_contributors()

        create_license()

        # Create Preprint
        osf_preprint_response = create_preprint()
        preprint_id = osf_preprint_response['data']['id']

        update_preprint_license()

        return (deposit_result)
Exemple #3
0
 def test_init_invalid_status(self):
     """
     Must raise exception invalid value
     """
     key = 'spam'
     if 'spam' in [x[0] for x in DEPOSIT_STATUS_CHOICES]:
         raise Exception(
             '{} must not be DEPOSIT_STATUS_CHOICES to have valid test'.
             format(key))
     with pytest.raises(ValueError):
         DepositResult(status=key)
 def test_add_embargo_date_to_deposit_result(self, embargo):
     """
     If an embargo is set, add to deposit record, otherwise not
     """
     # We just set cleaned data directly
     f = Form()
     f.cleaned_data = dict()
     if embargo is not None:
         f.cleaned_data['embargo'] = embargo
     dr = DepositResult(status='pending')
     dr = self.protocol._add_embargo_date_to_deposit_result(dr, f)
     assert dr.embargo_date == embargo
Exemple #5
0
 def test_add_license_to_deposit_result(self, license_chooser):
     """
     If a license is selected, add to deposit record, otherwise not
     """
     # We just set the cleaned data directly
     f = Form()
     f.cleaned_data = dict()
     if license_chooser:
         f.cleaned_data['license'] = license_chooser
     dr = DepositResult(status='pending')
     dr = self.protocol._add_license_to_deposit_result(dr, f)
     if license_chooser:
         assert dr.license == license_chooser.license
     else:
         assert dr.license == None
Exemple #6
0
 def test_init(self):
     d = DepositResult(status='pending')
     attributes = [
         'identifier',
         'splash_url',
         'pdf_url',
         'logs',
         'status',
         'message',
         'license',
         'oairecord',
         'additional_info',
     ]
     assert len(d.__dict__) == len(attributes)
     for attribute in attributes:
         assert hasattr(d, attribute)
Exemple #7
0
    def test_init(self):
        d = DepositResult(status='pending')
        attributes = [
            'identifier',
            'splash_url',
            'pdf_url',
            'logs',
            'status',
            'message',
            'license',
            'oairecord',
            'embargo_date',
            'additional_info',
        ]

        assert set(attributes) == set(d.__dict__)
Exemple #8
0
    def submit_deposit(self, pdf, form, dry_run=False):
        conn = None
        try:
            self.log("### Connecting")
            conn = self.get_conn()
            self.log("### Creating metadata")
            #entry = self.createMetadata(form)
            # self.log(entry.pretty_print())

            formatter = DCFormatter()
            meta = formatter.toString(self.paper,
                                      'article.pdf',
                                      True,
                                      xml_declaration=False)
            self.log(meta)

            self.log("### Submitting metadata")
            #f = StringIO(pdf)
            # receipt = conn.create(metadata_entry=entry,mimetype="application/pdf",
            #        payload=f,col_iri=self.repository.api_key)
            #receipt = conn.create(metadata_entry=entry,col_iri=self.repository.api_key)
            files = {'file': ('metadata.xml', meta)}
            headers = {
                'In-Progress': 'false',
                'Content-Type': 'application/atom+xml; type=entry'
            }
            auth = requests.auth.HTTPBasicAuth(self.repository.username,
                                               self.repository.password)
            r = requests.post(self.repository.api_key,
                              files=files,
                              headers=headers,
                              auth=auth)
            self.log_request(
                r, 201, __('Unable to submit the paper to the collection.'))

            self.log(unicode(r.text))

            deposit_result = DepositResult()
        except requests.exceptions.RequestException as e:
            raise DepositError(unicode(e))
        except sword2.exceptions.HTTPResponseError as e:
            if conn is not None:
                self.log(unicode(conn.history))
            raise DepositError(__('Failed to connect to the SWORD server.'))

        return deposit_result
Exemple #9
0
    def test_submit_deposit_wrapper(self, splash_url, expected_splash_url, on_todolist, book_god_of_the_labyrinth, depositing_user, monkeypatch):
        """
        We monkeypatch the submit_deposit to return a DepositResult.
        """
        self.protocol.paper = book_god_of_the_labyrinth
        self.protocol.user = depositing_user

        if on_todolist:
            book_god_of_the_labyrinth.todolist.add(self.protocol.user)

        dr = DepositResult(splash_url=splash_url)
        monkeypatch.setattr(self.protocol, 'submit_deposit', lambda *args, **kwargs: dr)

        deposit_result = self.protocol.submit_deposit_wrapper()

        assert isinstance(deposit_result, DepositResult)
        assert isinstance(deposit_result.oairecord, expected_splash_url)
        assert book_god_of_the_labyrinth.todolist.filter(pk=self.protocol.user.pk).exists() == False
Exemple #10
0
    def _get_deposit_result(self, response):
        """
        Processes the deposit result as presented by sword.
        We try to set the splash url.
        We set deposit_status to pending.
        We do not set a pdf_url because we expect moderation, so a pdf_url would be a dead link (for samoe time).
        """
        try:
            sword_statement = etree.fromstring(
                bytes(response, encoding='utf-8'))
        except etree.XMLSyntaxError:
            self.log('Invalid XML response from {}'.format(
                self.repository.name))
            raise DepositError(
                _('The repository {} returned invalid XML').format(
                    self.repository.name))

        original_deposit = sword_statement.find(
            './/sword:originalDeposit', namespaces=sword_statement.nsmap)

        if original_deposit is None:
            splash_url = None
        else:
            splash_url = original_deposit.get('href', None)
        if splash_url is not None:
            identifier = splash_url.split('/')[-1]
        else:
            identifier = None
            msg = 'Found no splash url in XML reposonse from repository {}. Either no originalDeposit was present or the href was missing.'.format(
                self.repository.name)
            self.log(msg)
            logger.warning(msg)

        # We expect that SWORD Repos usually have moderation. If this is at some point not the case, we can make this more flexible
        status = 'pending'

        deposit_result = DepositResult(identifier=identifier,
                                       splash_url=splash_url,
                                       status=status)

        return deposit_result
Exemple #11
0
    def submit_deposit(self, pdf, form, dry_run=False):
        if self.repository.api_key is None:
            raise DepositError(_("No Zenodo API key provided."))
        api_key = self.repository.api_key
        api_url_with_key = self.api_url+'?access_token='+api_key

        deposit_result = DepositResult()

        # Checking the access token
        self.log("### Checking the access token")
        r = requests.get(api_url_with_key)
        hiccups_message = ' ' + _(
            'This happens when Zenodo has hiccups. '
            'Please try again in a few minutes or use a different '
            'repository in the menu above.'
        )
        self.log_request(
            r, 200,
            _('Unable to authenticate to Zenodo.') + hiccups_message
        )

        # Creating a new deposition
        self.log("### Creating a new deposition")
        headers = {"Content-Type": "application/json"}
        r = requests.post(api_url_with_key, data=str("{}"), headers=headers)
        self.log_request(r, 201, _(
            'Unable to create a new deposition on Zenodo.')+hiccups_message)
        deposition_id = r.json()['id']
        deposit_result.identifier = deposition_id
        self.log("Deposition id: %d" % deposition_id)

        # Uploading the PDF
        self.log("### Uploading the PDF")
        data = {'name': 'article.pdf'}
        files = {'file': open(pdf, 'rb')}
        r = requests.post(
            (
                self.api_url + "/%s/files?access_token=%s" %
                (deposition_id, api_key)
            ),
            data=data, files=files
        )
        self.log_request(r, 201, _(
            'Unable to transfer the document to Zenodo.')+hiccups_message)

        # Creating the metadata
        self.log("### Generating the metadata")
        data = self.createMetadata(form)
        self.log(json.dumps(data, indent=4)+'')

        # Check that there is an abstract
        if data['metadata'].get('description', '') == '':
            self.log('No abstract found, aborting.')
            raise DepositError(_(
                'No abstract is available for this paper but Zenodo '
                'requires one. Please provide it using the metadata panel.'
            ))

        # Submitting the metadata
        self.log("### Submitting the metadata")
        r = requests.put(
            self.api_url + "/%s?access_token=%s" % (deposition_id, api_key),
            data=json.dumps(data),
            headers=headers
        )
        self.log_request(r, 200, _(
            'Unable to submit paper metadata to Zenodo.'))

        if dry_run:
            # Deleting the deposition
            self.log("### Deleting the deposition")
            r = requests.delete(self.api_url+"/%s?access_token=%s" %
                                (deposition_id, api_key))
            self.log(r.text)
            deposit_result.status = 'faked'
            deposit_result.splash_url = 'http://sandbox.zenodo.org/fake'
            deposit_result.pdf_url = deposit_result.splash_url
        else:
            self.log("### Publishing the deposition")
            r = requests.post(
                self.api_url + "/%s/actions/publish?access_token=%s" %
                (deposition_id, api_key)
            )
            self.log_request(r, 202, _(
                'Unable to publish the deposition on Zenodo.'))
            self.log(r.text)

            deposition_object = r.json()
            links = deposition_object.get('links', {})
            deposit_result.splash_url = links.get(
                'record_html', 'https://zenodo.org/'
            )
            deposit_result.pdf_url = (
                deposit_result.splash_url + '/files/article.pdf'
            )

        return deposit_result
Exemple #12
0
    def submit_deposit(self, pdf, form, dry_run=False):
        if self.repository.api_key is None:
            raise DepositError(__("No Zenodo API key provided."))
        api_key = self.repository.api_key
        api_url_with_key = self.api_url + '?access_token=' + api_key

        deposit_result = DepositResult()

        # Checking the access token
        self.log("### Checking the access token")
        r = requests.get(api_url_with_key)
        self.log_request(r, 200, __('Unable to authenticate to Zenodo.'))

        # Creating a new deposition
        self.log("### Creating a new deposition")
        headers = {"Content-Type": "application/json"}
        r = requests.post(api_url_with_key, data=str("{}"), headers=headers)
        self.log_request(r, 201,
                         __('Unable to create a new deposition on Zenodo.'))
        deposition_id = r.json()['id']
        deposit_result.identifier = deposition_id
        self.log("Deposition id: %d" % deposition_id)

        # Uploading the PDF
        self.log("### Uploading the PDF")
        data = {'name': 'article.pdf'}
        files = {'file': open(pdf, 'rb')}
        r = requests.post(self.api_url + "/%s/files?access_token=%s" %
                          (deposition_id, api_key),
                          data=data,
                          files=files)
        self.log_request(r, 201,
                         __('Unable to transfer the document to Zenodo.'))

        # Creating the metadata
        self.log("### Generating the metadata")
        data = self.createMetadata(form)
        self.log(json.dumps(data, indent=4) + '')

        # Check that there is an abstract
        if data['metadata'].get('description', '') == '':
            self.log('No abstract found, aborting.')
            raise DepositError(
                __('No abstract is available for this paper but ' +
                   'Zenodo requires to attach one. Please use the metadata panel to provide one.'
                   ))

        # Submitting the metadata
        self.log("### Submitting the metadata")
        r = requests.put(self.api_url + "/%s?access_token=%s" %
                         (deposition_id, api_key),
                         data=json.dumps(data),
                         headers=headers)
        self.log_request(r, 200,
                         __('Unable to submit paper metadata to Zenodo.'))

        if dry_run:
            # Deleting the deposition
            self.log("### Deleting the deposition")
            r = requests.delete(self.api_url + "/%s?access_token=%s" %
                                (deposition_id, api_key))
            self.log(r.text)
            deposit_result.status = 'faked'
            deposit_result.splash_url = 'http://sandbox.zenodo.org/fake'
            deposit_result.pdf_url = deposit_result.splash_url
        else:
            self.log("### Publishing the deposition")
            r = requests.post(self.api_url +
                              "/%s/actions/publish?access_token=%s" %
                              (deposition_id, api_key))
            self.log_request(r, 202,
                             __('Unable to publish the deposition on Zenodo.'))
            self.log(r.text)

            deposition_object = r.json()
            links = deposition_object.get('links', {})
            deposit_result.splash_url = links.get('record_html',
                                                  'https://zenodo.org/')
            deposit_result.pdf_url = deposit_result.splash_url + '/files/article.pdf'

        return deposit_result
Exemple #13
0
    def submit_deposit(self, pdf, form, dry_run=False):
        if not self.api_url:
            raise DepositError(_("No Repository endpoint provided."))

        if self.repository.api_key is None:
            raise DepositError(_("No OSF token provided."))

        api_key = self.repository.api_key
        self.license_id = form.cleaned_data['license'].transmit_id
        self.user_id_on_osf = self.get_preferences(self.user).on_behalf_of

        paper, abstract = self.get_primary_data(form)
        authors = paper['authors']
        records = paper['records']
        self.pub_date = paper['date'][:-6]
        tags = self.create_tags(form)
        subjects = self.create_subjects(form)

        deposit_result = DepositResult()

        # To connect to the API.
        self.headers = {
            'Authorization': 'Bearer %s' % api_key,
            'Content-Type': 'application/vnd.api+json'
        }
        self.user_id = self.repository.username

        # Creating the metadata.
        self.create_node(abstract, tags, authors)

        self.log("### Creating a new deposition")
        osf_storage_data = self.get_newnode_osf_storage(self.node_id)
        osf_links = osf_storage_data['data']
        osf_upload_link = str(
            list({self.translate_links(entry)
                  for entry in osf_links}))
        osf_upload_link = osf_upload_link.replace("[u'", '').replace("']", '')

        self.log("### Uploading the PDF")
        upload_url_suffix = "?kind=file&name=article.pdf"
        upload_url = osf_upload_link + upload_url_suffix
        data = open(pdf, 'r')
        primary_file_data = requests.put(upload_url,
                                         data=data,
                                         headers=self.headers)
        self.log_request(primary_file_data, 201,
                         _('Unable to upload the PDF file.'))
        primary_file_data = primary_file_data.json()

        pf_path = primary_file_data['data']['attributes']['path'][1:]

        self.add_contributors(authors)

        self.create_license(authors)

        # Create the Preprint.
        osf_preprint_response = (self.create_preprint(pf_path, records,
                                                      subjects))
        preprint_id = osf_preprint_response['data']['id']

        if self.api_url == "https://test-api.osf.io/":
            self.preprint_public_url = "https://test.osf.io/" + preprint_id
        else:
            self.preprint_public_url = "https://osf.io/" + preprint_id

        preprint_public_pdf = self.preprint_public_url + "/download"

        self.update_preprint_license(authors, preprint_id)
        self.mask_dissemin_contributor()

        if self.api_url == "https://test-api.osf.io/":
            self.project_public_url = "https://test.osf.io/" + self.node_id
        else:
            self.project_public_url = "https://osf.io/" + self.node_id

        self.log("### FINAL DEBUG")
        self.log(self.project_public_url)
        self.log(self.preprint_public_url)
        self.log(preprint_public_pdf)

        if dry_run:
            self.log("### Deleting the deposition")
            deletion_req = requests.delete(self.node_url, headers=self.headers)
            self.log_request(deletion_req, 204,
                             _('Unable to delete the project.'))
            self.log(str(deletion_req.status_code))
            self.log(deletion_req.text)
        else:
            self.log("### Publishing the deposition")
            public_project = {
                "data": {
                    "type": "nodes",
                    "id": self.node_id,
                    "attributes": {
                        "public": "true"
                    }
                }
            }

            public_preprint = {
                "type": "preprints",
                "data": {
                    "id": preprint_id,
                    "attributes": {
                        "is_published": "true"
                    }
                }
            }
            self.log("### Make the project public")
            project_pub_req = requests.patch(self.node_url,
                                             data=json.dumps(public_project),
                                             headers=self.headers)

            self.log_request(project_pub_req, 200,
                             _('Unable to make the project public.'))

            self.log("### Make the preprint public")
            preprint_pub_req = requests.patch(self.preprint_node_url,
                                              data=json.dumps(public_preprint),
                                              headers=self.headers)

            self.log_request(preprint_pub_req, 200,
                             _('Unable to make the project public.'))

        deposit_result.identifier = self.project_public_url
        deposit_result.splash_url = self.preprint_public_url
        deposit_result.pdf_url = preprint_public_pdf

        return deposit_result
Exemple #14
0
    def submit_deposit(self, pdf, form, dry_run=False):
        if self.username is None or self.password is None:
            raise DepositError(_("No HAL user credentials provided."))

        deposit_result = DepositResult()

        try:
            # Creating the metadata
            self.log("### Generating metadata")
            metadata = self.create_metadata(form)

            # Bundling the metadata and the PDF
            self.log("### Creating ZIP file")
            zipFile = self.create_zip(pdf, metadata)

            # Build the list of users who should own this deposit
            on_behalf_of = [self.username]
            if self.hal_preferences.on_behalf_of:
                on_behalf_of.append(self.hal_preferences.on_behalf_of)

            # Creating a new deposition
            self.log("### Creating a new deposition")

            parsed_endpoint = urlparse(self.api_url)
            host = parsed_endpoint.netloc
            path = parsed_endpoint.path + 'hal'

            conn = http_client.HTTPConnection(host)
            conn.putrequest('POST', path, True, True)
            zipContent = zipFile.getvalue()
            headers = {
                'Authorization': self.encodeUserData(),
                'Host': host,
                'X-Packaging': 'http://purl.org/net/sword-types/AOfr',
                'Content-Type': 'application/zip',
                'Content-Disposition': 'attachment; filename=meta.xml',
                'Content-Length': len(zipContent),
                'On-Behalf-Of': ';'.join(on_behalf_of),
                }
            for header, value in list(headers.items()):
                conn.putheader(header, value)
            conn.endheaders()
            conn.send(zipContent)
            resp = conn.getresponse()

            xml_response = resp.read()
            conn.close()
            try:
                parser = etree.XMLParser(encoding='utf-8')
                receipt = etree.parse(BytesIO(xml_response), parser)
                if resp.status != 201:
                    self.log('Deposit response status: HTTP %d' % resp.status)
                    self.log(xml_response.decode('utf-8'))
                    self.log('Metadata payload was:')
                    self.log(metadata.decode('utf-8'))
                    # Get the verbose description of the error to output it as well
                    root = receipt.getroot()
                    verboseDescription = (
                        next(
                            root.iter(
                                "{http://purl.org/net/sword/error/}verboseDescription"
                            )
                        ).text
                    )
                    try:
                        # Give a better error message to the user if the document
                        # already exists in HAL. See #356.
                        assert "duplicate-entry" in json.loads(verboseDescription)
                        raise DepositError(
                            _(
                                'This document is already in HAL. '
                                'HAL refused the deposit.'
                            )
                        )
                    except (ValueError, AssertionError):
                        raise DepositError(
                            _(
                                'HAL refused the deposit (HTTP error %d): %s') %
                                (resp.status, verboseDescription)
                            )
            except etree.XMLSyntaxError:
                self.log('Invalid XML response from HAL:')
                self.log(xml_response.decode('utf-8'))
                self.log('(end of the response)')
                raise DepositError(_('HAL returned an invalid XML response'))

            receipt = receipt.getroot()
            if receipt.tag == '{http://purl.org/net/sword/error/}error':
                self.log('Error while depositing the content.')
                verbosedesc = receipt.find(
                    '{http://purl.org/net/sword/error/}verboseDescription')

                # this will happen if a paper has not made its way via
                # OAI to us, so we could not detect that earlier in the
                # submission
                if verbosedesc is not None and 'duplicate-entry' in verbosedesc.text:
                    raise DepositError(_('This paper already exists in HAL.'))

                # Otherwise this error should not happen: let's dump
                # everything to check later
                self.log('Here is the XML response:{}'.format(xml_response.decode('utf-8')))
                self.log('Here is the metadata:{}'.format(metadata.decode('utf-8')))
                raise DepositError(_('HAL rejected the submission.'))
            else:
                self.log(xml_response.decode('utf-8'))

            deposition_id = receipt.find('{http://www.w3.org/2005/Atom}id').text
            password = receipt.find(
                '{http://hal.archives-ouvertes.fr/}password').text
            document_url = resp.getheader('location')

            if not deposition_id:
                raise DepositError(_('HAL rejected the submission'))

            self.log("Deposition id: %s" % deposition_id)

            deposit_result.identifier = deposition_id
            deposit_result.splash_url = document_url
            deposit_result.pdf_url = None
            deposit_result.status = 'pending' # HAL moderates submissions
            deposit_result.additional_info = [
                {'label':_('Password'),
                 'value':password},
            ]

            if dry_run:
                conn = http_client.HTTPConnection(host)
                conn.putrequest('DELETE', '/sword/'+deposition_id)
                headers = {
                    'Authorization': self.encodeUserData(),
                   # 'Host': host,
                    'Accept': '*/*',
                    'User-Agent': 'dissemin',
                }
                for header, value in list(headers.items()):
                    conn.putheader(header, value)
                conn.endheaders()
                resp = conn.getresponse()
                self.log(resp.read())
                conn.close()
                deposit_result.status = 'faked'

        except DepositError as e:
            raise e
        except Exception as e:
            self.log("Caught exception:")
            self.log(str(type(e))+': '+str(e)+'')
            self.log(traceback.format_exc())
            raise DepositError(_(
                'Connection to HAL failed. Please try again later.'))

        return deposit_result
Exemple #15
0
    def submit_deposit(self, pdf, form, dry_run=False):
        if self.username is None or self.password is None:
            raise DepositError(_("No HAL user credentials provided."))

        deposit_result = DepositResult()

        try:
            # Creating the metadata
            self.log("### Generating metadata")
            metadata = self.create_metadata(form)

            # Bundling the metadata and the PDF
            self.log("### Creating ZIP file")
            zipFile = self.create_zip(pdf, metadata)

            # Build the list of users who should own this deposit
            on_behalf_of = [self.username]
            if self.hal_preferences.on_behalf_of:
                on_behalf_of.append(self.hal_preferences.on_behalf_of)

            # Creating a new deposition
            self.log("### Creating a new deposition")

            parsed_endpoint = urlparse(self.api_url)
            host = parsed_endpoint.netloc
            path = parsed_endpoint.path + 'hal'

            if self.api_url.startswith('http://'):
                conn = http_client.HTTPConnection(host)
            else:
                conn = http_client.HTTPSConnection(host)
            conn.putrequest('POST', path, True, True)
            zipContent = zipFile.getvalue()
            headers = {
                'Authorization': self.encodeUserData(),
                'Host': host,
                'X-Packaging': 'http://purl.org/net/sword-types/AOfr',
                'Content-Type': 'application/zip',
                'Content-Disposition': 'attachment; filename=meta.xml',
                'Content-Length': len(zipContent),
                'On-Behalf-Of': ';'.join(on_behalf_of),
            }
            for header, value in list(headers.items()):
                conn.putheader(header, value)
            conn.endheaders()
            conn.send(zipContent)
            resp = conn.getresponse()

            xml_response = resp.read()
            conn.close()
            try:
                parser = etree.XMLParser(encoding='utf-8')
                receipt = etree.parse(BytesIO(xml_response), parser)
                if resp.status != 201:
                    self.log('Deposit response status: HTTP %d' % resp.status)
                    self.log(xml_response.decode('utf-8'))
                    self.log('Metadata payload was:')
                    self.log(metadata.decode('utf-8'))
                    # Get the verbose description of the error to output it as well
                    root = receipt.getroot()
                    verboseDescription = (next(
                        root.iter(
                            "{http://purl.org/net/sword/error/}verboseDescription"
                        )).text)
                    try:
                        # Give a better error message to the user if the document
                        # already exists in HAL. See #356.
                        assert "duplicate-entry" in json.loads(
                            verboseDescription)
                        raise DepositError(
                            _('This document is already in HAL. '
                              'HAL refused the deposit.'))
                    except (ValueError, AssertionError):
                        raise DepositError(
                            _('HAL refused the deposit (HTTP error %d): %s') %
                            (resp.status, verboseDescription))
            except etree.XMLSyntaxError:
                self.log('Invalid XML response from HAL:')
                self.log(xml_response.decode('utf-8'))
                self.log('(end of the response)')
                raise DepositError(_('HAL returned an invalid XML response'))

            receipt = receipt.getroot()
            if receipt.tag == '{http://purl.org/net/sword/error/}error':
                self.log('Error while depositing the content.')
                verbosedesc = receipt.find(
                    '{http://purl.org/net/sword/error/}verboseDescription')

                # this will happen if a paper has not made its way via
                # OAI to us, so we could not detect that earlier in the
                # submission
                if verbosedesc is not None and 'duplicate-entry' in verbosedesc.text:
                    raise DepositError(_('This paper already exists in HAL.'))

                # Otherwise this error should not happen: let's dump
                # everything to check later
                self.log('Here is the XML response:{}'.format(
                    xml_response.decode('utf-8')))
                self.log('Here is the metadata:{}'.format(
                    metadata.decode('utf-8')))
                raise DepositError(_('HAL rejected the submission.'))
            else:
                self.log(xml_response.decode('utf-8'))

            deposition_id = receipt.find(
                '{http://www.w3.org/2005/Atom}id').text
            password = receipt.find(
                '{http://hal.archives-ouvertes.fr/}password').text
            document_url = resp.getheader('location')

            if not deposition_id:
                raise DepositError(_('HAL rejected the submission'))

            self.log("Deposition id: %s" % deposition_id)

            deposit_result.identifier = deposition_id
            deposit_result.splash_url = document_url
            deposit_result.pdf_url = None
            deposit_result.status = 'pending'  # HAL moderates submissions
            deposit_result.additional_info = [
                {
                    'label': _('Password'),
                    'value': password
                },
            ]

            if dry_run:
                conn = http_client.HTTPConnection(host)
                conn.putrequest('DELETE', '/sword/' + deposition_id)
                headers = {
                    'Authorization': self.encodeUserData(),
                    # 'Host': host,
                    'Accept': '*/*',
                    'User-Agent': 'dissemin',
                }
                for header, value in list(headers.items()):
                    conn.putheader(header, value)
                conn.endheaders()
                resp = conn.getresponse()
                self.log(resp.read())
                conn.close()
                deposit_result.status = 'faked'

        except DepositError as e:
            raise e
        except Exception as e:
            self.log("Caught exception:")
            self.log(str(type(e)) + ': ' + str(e) + '')
            self.log(traceback.format_exc())
            raise DepositError(
                _('Connection to HAL failed. Please try again later.'))

        return deposit_result
Exemple #16
0
 def _get_deposit_result(*args, **kwargs):
     deposit_result = DepositResult()
     return deposit_result
Exemple #17
0
    def submit_deposit(self, pdf, form, dry_run=False):
        if not self.api_url:
            raise DepositError(_("No Repository endpoint provided."))

        if self.repository.api_key is None:
            raise DepositError(_("No OSF token provided."))

        api_key = self.repository.api_key
        self.license_id = form.cleaned_data['license']
        self.user_id_on_osf = self.get_preferences(self.user).on_behalf_of

        paper, abstract = self.get_primary_data(form)
        authors = paper['authors']
        records = paper['records']
        self.pub_date = paper['date'][:-6]
        tags = self.create_tags(form)
        subjects = self.create_subjects(form)

        deposit_result = DepositResult()

        # To connect to the API.
        self.headers = {
            'Authorization': 'Bearer %s' % api_key,
            'Content-Type': 'application/vnd.api+json'
        }
        self.user_id = self.repository.username

        # Creating the metadata.
        self.create_node(abstract, tags, authors)

        self.log("### Creating a new deposition")
        osf_storage_data = self.get_newnode_osf_storage(self.node_id)
        osf_links = osf_storage_data['data']
        osf_upload_link = str(
            list({self.translate_links(entry) for entry in osf_links})
        )
        osf_upload_link = osf_upload_link.replace("[u'", '').replace("']", '')

        self.log("### Uploading the PDF")
        upload_url_suffix = "?kind=file&name=article.pdf"
        upload_url = osf_upload_link + upload_url_suffix
        data = open(pdf, 'r')
        primary_file_data = requests.put(upload_url,
                                         data=data,
                                         headers=self.headers)
        self.log_request(primary_file_data, 201,
                         _('Unable to upload the PDF file.'))
        primary_file_data = primary_file_data.json()

        pf_path = primary_file_data['data']['attributes']['path'][1:]

        self.add_contributors(authors)

        self.create_license(authors)

        # Create the Preprint.
        osf_preprint_response = (
            self.create_preprint(pf_path, records, subjects)
        )
        preprint_id = osf_preprint_response['data']['id']

        if self.api_url == "https://test-api.osf.io/":
            self.preprint_public_url = "https://test.osf.io/" + preprint_id
        else:
            self.preprint_public_url = "https://osf.io/" + preprint_id

        preprint_public_pdf = self.preprint_public_url + "/download"

        self.update_preprint_license(authors, preprint_id)
        self.mask_dissemin_contributor()

        if self.api_url == "https://test-api.osf.io/":
            self.project_public_url = "https://test.osf.io/" + self.node_id
        else:
            self.project_public_url = "https://osf.io/" + self.node_id

        self.log("### FINAL DEBUG")
        self.log(self.project_public_url)
        self.log(self.preprint_public_url)
        self.log(preprint_public_pdf)

        if dry_run:
            self.log("### Deleting the deposition")
            deletion_req = requests.delete(self.node_url,
                                           headers=self.headers)
            self.log_request(deletion_req, 204,
                             _('Unable to delete the project.'))
            self.log(str(deletion_req.status_code))
            self.log(deletion_req.text)
        else:
            self.log("### Publishing the deposition")
            public_project = {
                "data": {
                    "type": "nodes",
                    "id": self.node_id,
                    "attributes": {
                        "public": "true"
                    }
                }
            }

            public_preprint = {
                "type": "preprints",
                "data": {
                    "id": preprint_id,
                    "attributes": {
                        "is_published": "true"
                    }
                }
            }
            self.log("### Make the project public")
            project_pub_req = requests.patch(self.node_url,
                                             data=json.dumps(public_project),
                                             headers=self.headers)

            self.log_request(project_pub_req, 200,
                             _('Unable to make the project public.'))

            self.log("### Make the preprint public")
            preprint_pub_req = requests.patch(self.preprint_node_url,
                                              data=json.dumps(public_preprint),
                                              headers=self.headers)

            self.log_request(preprint_pub_req, 200,
                             _('Unable to make the project public.'))

        deposit_result.identifier = self.project_public_url
        deposit_result.splash_url = self.preprint_public_url
        deposit_result.pdf_url = preprint_public_pdf

        return deposit_result
Exemple #18
0
    def submit_deposit(self, pdf, form, dry_run=False):
        if self.repository.api_key is None:
            raise DepositError(__("No OSF token provided."))

        api_key = self.repository.api_key

        deposit_result = DepositResult()

        # Creating the metadata
        self.log("### Creating the metadata")
        min_node_structure, authors = self.createMetadata(form)
        self.log(json.dumps(min_node_structure, indent=4) + '')
        self.log(json.dumps(authors, indent=4) + '')

        # Get a dictionary containing the first and last names
        # of the authors of a Dissemin paper,
        # ready to be implemented in an OSF Preprints data dict.
        def translate_authors(dissemin_authors):
            # first_name = dissemin_authors.paper.name.first
            # last_name = dissemin_authors.paper.name.last
            first_name = dissemin_authors['name']['first']
            last_name = dissemin_authors['name']['last']

            structure = {
                "data": {
                    "type": "contributors",
                    "attributes": {
                        "full_name": "{} {}".format(first_name, last_name)
                    }
                }
            }
            return structure

        # Extract the OSF Storage link
        def translate_links(node_links):
            upload_link = node_links['links']['upload']
            return upload_link

        # Checking the access token
        # self.log("### Checking the access token")
        # r = requests.get(api_url_with_key)
        # self.log_request(r, 200, __('Unable to authenticate to OSF.'))

        # Creating the metadata
        # self.log("### Creating the metadata")
        # data = self.createMetadata(form)
        # self.log(json.dumps(data, indent=4)+'')

        # Creating a new depository
        self.log("### Creating a new depository")
        headers = {
            'Authorization': 'Bearer %s' % api_key,
            'Content-Type': 'application/vnd.api+json'
        }

        # Send the min. structure.
        # The response should contain the node ID.
        def create_node():
            osf_response = requests.post(self.api_url,
                                         data=json.dumps(min_node_structure),
                                         headers=headers).json()
            return osf_response

        osf_response = create_node()
        # self.log(osf_response)
        node_id = osf_response['data']['id']

        # Get OSF Storage link
        # to later upload the Preprint PDF file.
        def get_newnode_osf_storage(node_id):
            self.storage_url = self.api_url + "{}/files/".format(node_id)
            osf_storage_data = requests.get(self.storage_url,
                                            headers=headers).json()
            return osf_storage_data

        self.osf_storage_data = get_newnode_osf_storage(node_id)
        osf_links = self.osf_storage_data['data']
        osf_upload_link = str(
            list({translate_links(entry)
                  for entry in osf_links}))
        osf_upload_link = osf_upload_link.replace("[u'", '').replace("']", '')

        # Uploading the PDF
        self.log("### Uploading the PDF")
        upload_url_suffix = "?kind=file&name=article.pdf"
        upload_url = osf_upload_link + upload_url_suffix
        data = open(pdf, 'r')
        primary_file_data = requests.put(upload_url,
                                         data=data,
                                         headers=headers).json()
        pf_path = primary_file_data['data']['attributes']['path'][1:]

        # self.log_request(primary_file_data, 201, __(
        #    'Unable to transfer the document to OSF.'))

        # Creating the metadata
        ## self.log("### Creating the metadata")
        ## data = self.createMetadata(form)
        ## self.log(json.dumps(data, indent=4)+'')

        # Add contributors
        def add_contributors():
            contrib_url = self.api_url + node_id + "/contributors/"

            for author in authors:
                contrib = translate_authors(author)
                contrib_response = requests.post(contrib_url,
                                                 data=json.dumps(contrib),
                                                 headers=headers).json()

        add_contributors()

        # Submitting the metadata
        self.log("### Submitting the metadata")
        # r = requests.

        # r = requests.post(api_url_with_key, data=str("{}"), headers=headers)
        # self.log_request(r, 201,__(
        #    'Unable to create a new deposition on OSF Preprints.'))
        # deposition_id = r.json()

        return deposit_result
Exemple #19
0
    def submit_deposit(self, pdf, form, dry_run=False):
        if self.repository.api_key is None:
            raise DepositError(__("No OSF token provided."))

        api_key = self.repository.api_key
        license_id = form.cleaned_data['license']

        deposit_result = DepositResult()

        # Creating the metadata
        self.log("### Creating the metadata")
        min_node_structure, authors, paper_doi, pub_date = (
            self.createMetadata(form))
        self.log(json.dumps(min_node_structure, indent=4) + '')
        self.log(json.dumps(authors, indent=4) + '')

        # Get a dictionary containing the first and last names
        # of the authors of a Dissemin paper,
        # ready to be implemented in an OSF Preprints data dict.
        def translate_author(dissemin_authors, goal="optional"):
            author = "{} {}".format(dissemin_authors['name']['first'],
                                    dissemin_authors['name']['last'])

            if goal == "contrib":
                structure = {
                    "data": {
                        "type": "contributors",
                        "attributes": {
                            "full_name": author
                        }
                    }
                }
                return (structure)

            else:
                return (author)

        # Extract the OSF Storage link
        def translate_links(node_links):
            upload_link = node_links['links']['upload']
            return (upload_link)

        # Creating a new depository
        self.log("### Creating a new depository")
        headers = {
            'Authorization': 'Bearer %s' % api_key,
            'Content-Type': 'application/vnd.api+json'
        }

        # Send the min. structure.
        # The response should contain the node ID.
        def create_node():
            osf_response = requests.post(self.api_url,
                                         data=json.dumps(min_node_structure),
                                         headers=headers)
            self.log_request(osf_response, 201,
                             __('Unable to create a project on OSF.'))

            osf_response = osf_response.json()
            return (osf_response)

        osf_response = create_node()
        node_id = osf_response['data']['id']

        # Get OSF Storage link
        # to later upload the Preprint PDF file.
        def get_newnode_osf_storage(node_id):
            self.storage_url = self.api_url + "{}/files/".format(node_id)
            osf_storage_data = requests.get(self.storage_url, headers=headers)
            self.log_request(osf_storage_data, 200,
                             __('Unable to authenticate to OSF.'))

            osf_storage_data = osf_storage_data.json()
            return (osf_storage_data)

        self.osf_storage_data = get_newnode_osf_storage(node_id)
        osf_links = self.osf_storage_data['data']
        osf_upload_link = str(
            list({translate_links(entry)
                  for entry in osf_links}))
        osf_upload_link = osf_upload_link.replace("[u'", '').replace("']", '')

        # Uploading the PDF
        self.log("### Uploading the PDF")
        upload_url_suffix = "?kind=file&name=article.pdf"
        upload_url = osf_upload_link + upload_url_suffix
        data = open(pdf, 'r')
        primary_file_data = requests.put(upload_url,
                                         data=data,
                                         headers=headers)
        self.log_request(primary_file_data, 201,
                         __('Unable to upload the PDF file.'))
        primary_file_data = primary_file_data.json()
        pf_path = primary_file_data['data']['attributes']['path'][1:]

        # Add contributors
        def add_contributors():
            contrib_url = self.api_url + node_id + "/contributors/"

            for author in authors:
                contrib = translate_author(author, "contrib")
                contrib_response = requests.post(contrib_url,
                                                 data=json.dumps(contrib),
                                                 headers=headers)
                self.log_request(contrib_response, 201,
                                 __('Unable to add contributors.'))

        add_contributors()

        def create_license():
            node_url = self.api_url + node_id + "/"
            license_url = "https://api.osf.io/v2/licenses/"
            license_url = license_url + "{}".format(license_id) + "/"
            authors_list = [translate_author(author) for author in authors]

            license_structure = {
                "data": {
                    "type": "nodes",
                    "id": node_id,
                    "attributes": {},
                    "relationships": {
                        "license": {
                            "data": {
                                "type": "licenses",
                                "id": license_id
                            }
                        }
                    }
                }
            }

            if license_id == NO_LICENSE_ID:
                license_structure['data']['attributes'] = {
                    "node_license": {
                        "year": pub_date,
                        "copyright_holders": authors_list
                    }
                }
            else:
                license_structure['data']['attributes'] = {"node_license": {}}

            license_req = requests.patch(node_url,
                                         data=json.dumps(license_structure),
                                         headers=headers)
            self.log_request(license_req, 200, __('Unable to update license.'))
            # license_response = license_req.json()

            # Updating License
            self.log("### Updating License")
            self.log(str(license_req.status_code))
            self.log(license_req.text)

        create_license()

        def create_preprint():
            license_url = "https://api.osf.io/v2/licenses/"
            license_url = license_url + "{}".format(license_id)
            min_preprint_structure = {
                "data": {
                    "attributes": {
                        "doi": paper_doi
                    },
                    "relationships": {
                        "node": {
                            "data": {
                                "type": "nodes",
                                "id": node_id
                            }
                        },
                        "primary_file": {
                            "data": {
                                "type": "primary_files",
                                "id": pf_path
                            }
                        },
                        "license": {
                            "links": {
                                "related": {
                                    "href": license_url,
                                    "meta": {}
                                }
                            }
                        },
                        "provider": {
                            "data": {
                                "type": "providers",
                                "id": "osf"
                            }
                        }
                    }
                }
            }

        return (deposit_result)