def license_detect(self, record):

        relevant_publishers = []
        for bu in self._base_urls:
            cburl = self.clean_url(bu)
            relevant_publishers += Publisher.find_by_journal_url('http://' + cburl)

        lic_statements = []
        for pub in relevant_publishers:
            for l in pub['licenses']:
                lic_statement = {}
                lic_statement[l['license_statement']] = {'type': l['license_type'], 'version': l.get('version', '')}
                lic_statements.append(lic_statement)

        if not lic_statements:
            lic_statements = self._license_mappings

        for url in record.provider_urls:
            if self.supports_base_url(url):
                # try the fulltext first
                if not url.endswith('/fulltext.html'):
                    url_with_suffix = url + '/fulltext.html'
                else:
                    url_with_suffix = url
                self.simple_extract(lic_statements, record, url_with_suffix)

                if not record.has_license() and not record.was_licensed():
                    # if not successful, try the URL as-is
                    self.simple_extract(lic_statements, record, url)

        return (self._short_name, self.__version__)
    def get_description(self, plugin_name):
        """
        Return a plugin.PluginDescription object that describes the plugin configuration
        identified by the given name
        """

        p = Publisher.q2obj(q='publisher_name:' + plugin_name.lower())
        if not p:
            # shouldn't really happen, but this should give an
            # indication if it does
            raise ValueError('Unsupported plugin name.')
        p = p[0]

        license_support = "The following license statements are recognised:\n\n"
        statement_index = []
        for lic in p.data['licenses']:
            statement = lic['license_statement']
            statement_index.append(statement)
            ltype = lic['license_type']
            version = lic['version']
            license_support += ltype + " " + version + ":\n" + statement + "\n\n"

        return plugin.PluginDescription(
            name=plugin_name,
            version=self.__version__,
            description="A supported publisher (registered via the register a publisher form).",
            provider_support="\n".join(p.data['journal_urls']),
            license_support=license_support,
            edit_id=p['id']
        )
 def get_names(self):
     """
     Return the list of names of configurations supported by the GSM
     """
     configs = Publisher.all(sort=[{"publisher_name.exact" : {"order" : "asc"}}])
     names = [p['publisher_name'] for p in configs]
     return names
 def has_name(self, plugin_name):
     """
     Return True if there is a configuration for the given plugin name
     """
     r = Publisher.query(q='publisher_name:' + plugin_name.lower())
     if r['hits']['total'] > 0:
         return True
     return False
Esempio n. 5
0
def publisher_edit(publisher_id=None):
    p = Publisher.pull(publisher_id)
    form = PublisherLicenseForm(request.form, p)

    if request.method == 'POST' and form.validate():
        if not p:
            p = Publisher()
        p.publisher_name = form.publisher_name.data
        p.journal_urls = form.journal_urls.data
        p.licenses = form.licenses.data
        for l in p.licenses:
            new_ls = LicenseStatement(**l)
            new_ls.save()
        p.save()
        return redirect(url_for('.publisher_edit', publisher_id=p.id))
        
    return render_template('publisher.html', form=form)
    def license_detect(self, record):

        relevant_publishers = []
        for bu in self._base_urls:
            cburl = self.clean_url(bu)
            relevant_publishers += Publisher.find_by_journal_url('http://' + cburl)

        lic_statements = []
        for pub in relevant_publishers:
            for l in pub['licenses']:
                lic_statement = {}
                lic_statement[l['license_statement']] = {'type': l['license_type'], 'version': l.get('version', '')}
                lic_statements.append(lic_statement)

        if not lic_statements:
            lic_statements = self._license_mappings

        for url in record.provider_urls:
            if self.supports_base_url(url):
                # TODO refactor self.simple_extract into several pieces
                # a downloader, a matcher, and a f() that records the license info
                # so the first two (and perhaps a general version of the third)
                # can be used here instead of this plugin having to do
                # all the work itself.
                r, content, content_length = http_stream_get(url)

                extra_provenance = {
                    "accepted_author_manuscript": False
                }

                for amm in self._author_manuscript_mappings:
                    statement = amm.keys()[0].strip()
                    if statement in content:
                        extra_provenance = amm[statement]
                        break

                self.simple_extract(lic_statements, record, url, content=content,
                                    extra_provenance=extra_provenance)


        return self._short_name, self.__version__
    def license_detect(self, record):
        # get all the URL-s from ES into a list
        #     need some way of getting facets from the DAO, ideally
        #     directly in list form as well as the raw form
        all_configs = Publisher.all(sort=[{'publisher_name': 'asc'}])  # always get them in the same order relative to each other
        url_index = self._generate_publisher_config_index_by_url(all_configs)
        url_index = OrderedDict(sorted(url_index.iteritems(), key=lambda x: len(x[0]), reverse=True))  # longest url-s first
        id_index = self._generate_publisher_config_index_by_id(all_configs)

        # get all the configs that match
        matching_configs = []
        work_on = record.provider_urls
        work_on = self.clean_urls(work_on, strip_leading_www=True)

        for config_url, config_id in url_index.items():
            for incoming_url in work_on:
                if incoming_url.startswith(config_url):
                    matching_configs.append(id_index[config_id])
        # future:
        # use tries to prefix match them to the incoming URL
        #     if the results of this could be ordered by URL length that
        #     would be great, or stop at first match option

        urls_contents = {}
        # prefetch the content, we'll be reusing it a lot
        for incoming_url in record.provider_urls:
            unused_response, urls_contents[incoming_url], unused_content_length = util.http_stream_get(incoming_url)

        # order their license statements by whether they have a version,
        # and then by length

        successful_config = None
        current_licenses_count = len(record.license)
        new_licenses_count = 0
        for config in matching_configs:
            matching_config_licenses = config['licenses']

            matching_config_licenses = sorted(
                matching_config_licenses,
                key=lambda lic: (
                    lic.get('version'),  # with reverse=True, this will actually sort licenses in REVERSE ALPHABETICAL order of their versions, blank versions go last
                    len(lic['license_statement'])  # longest first with reverse=True
                ),
                reverse=True
            )

            # try matching like that
            lic_statements = []
            for l in matching_config_licenses:
                lic_statement = {}
                lic_statement[l['license_statement']] = {'type': l['license_type'], 'version': l['version']}
                lic_statements.append(lic_statement)

            for incoming_url, content in urls_contents.iteritems():
                self.simple_extract(lic_statements, record, incoming_url, first_match=True, content=content, handler=config.publisher_name)
                new_licenses_count = len(record.license)
                # if we find a license, stop trying the different URL-s
                if new_licenses_count > current_licenses_count:
                    break
            # if we find a license, stop trying the configs and record which config found it
            if new_licenses_count > current_licenses_count:
                # found it!
                successful_config = config
                break

        # if no config exists which can match the license, then try the flat list
        # do not try the flat list of statements if a matching config has been found
        # this keeps these "virtual" plugins, i.e. the configs, consistent with how
        # the rest of the system operates
        lic_statements = []
        flat_license_list_success = False
        if len(matching_configs) <= 0:
            all_statements = LicenseStatement.all()
            all_statements = sorted(
                all_statements,
                key=lambda lic: (
                    lic.get('version', '') == '',  # does it NOT have a version? last!
                    # see http://stackoverflow.com/questions/9386501/sorting-in-python-and-empty-strings

                    len(lic['license_statement'])  # length of license statement
                )
            )

            for l in all_statements:
                lic_statement = {}
                lic_statement[l['license_statement']] = {'type': l['license_type'], 'version': l.get('version', '')}
                lic_statements.append(lic_statement)

            for incoming_url, content in urls_contents.iteritems():
                self.simple_extract(lic_statements, record, incoming_url, first_match=True, content=content)  # default handler - the plugin's name
                new_licenses_count = len(record.license)
                # if we find a license, stop trying the different URL-s
                if new_licenses_count > current_licenses_count:
                    break

            if new_licenses_count > current_licenses_count:
            # one of the flat license index did it
                flat_license_list_success = True

        if successful_config:
            return successful_config.publisher_name, self.__version__
        elif flat_license_list_success:
            return self._short_name, self.__version__

        # in case everything fails, return 'oag' as the handler to
        # be consistent with the failure handler in the workflow module
        # so that way, all "completely failed" licenses will have 'oag'
        # on them, except that the GSM ones will have the GSM's current
        # version
        return 'oag', self.__version__