def list_statements(): statements = LicenseStatement.all(sort=[{"license_type": {"order": "asc"}}]) if request.method == "POST": for key in request.form: if key.startswith("delete_statement"): which = key.split("-")[1] ls = LicenseStatement.pull(which) if ls: ls.delete() sleep(1.5) # ugly hack, make sure statement is saved before showing to user statements = LicenseStatement.all(sort=[{"license_type": {"order": "asc"}}]) return render_template("statements.html", statements=statements, license_info=LICENSES)
def publisher_edit(publisher_id=None): p = Publisher.pull(publisher_id) form = PublisherLicenseForm(request.form, p) if request.method == 'POST' and form.validate(): if not p: p = Publisher() p.publisher_name = form.publisher_name.data p.journal_urls = form.journal_urls.data p.licenses = form.licenses.data for l in p.licenses: new_ls = LicenseStatement(**l) new_ls.save() p.save() return redirect(url_for('.publisher_edit', publisher_id=p.id)) return render_template('publisher.html', form=form)
def statement_edit(statement_id=None): ls = LicenseStatement.pull(statement_id) form = LicenseForm(request.form, ls) if request.method == "POST" and form.validate(): if not ls: ls = LicenseStatement() ls.license_statement = form.license_statement.data ls.license_type = form.license_type.data if form.version.data: ls.version = form.version.data if form.example_doi.data: ls.example_doi = form.example_doi.data ls.save() sleep(1.5) # ugly hack, make sure statement is saved before showing to user return redirect(url_for(".list_statements", _anchor=ls.edit_id)) return render_template("statement.html", form=form)
def license_detect(self, record): # get all the URL-s from ES into a list # need some way of getting facets from the DAO, ideally # directly in list form as well as the raw form all_configs = Publisher.all(sort=[{'publisher_name': 'asc'}]) # always get them in the same order relative to each other url_index = self._generate_publisher_config_index_by_url(all_configs) url_index = OrderedDict(sorted(url_index.iteritems(), key=lambda x: len(x[0]), reverse=True)) # longest url-s first id_index = self._generate_publisher_config_index_by_id(all_configs) # get all the configs that match matching_configs = [] work_on = record.provider_urls work_on = self.clean_urls(work_on, strip_leading_www=True) for config_url, config_id in url_index.items(): for incoming_url in work_on: if incoming_url.startswith(config_url): matching_configs.append(id_index[config_id]) # future: # use tries to prefix match them to the incoming URL # if the results of this could be ordered by URL length that # would be great, or stop at first match option urls_contents = {} # prefetch the content, we'll be reusing it a lot for incoming_url in record.provider_urls: unused_response, urls_contents[incoming_url], unused_content_length = util.http_stream_get(incoming_url) # order their license statements by whether they have a version, # and then by length successful_config = None current_licenses_count = len(record.license) new_licenses_count = 0 for config in matching_configs: matching_config_licenses = config['licenses'] matching_config_licenses = sorted( matching_config_licenses, key=lambda lic: ( lic.get('version'), # with reverse=True, this will actually sort licenses in REVERSE ALPHABETICAL order of their versions, blank versions go last len(lic['license_statement']) # longest first with reverse=True ), reverse=True ) # try matching like that lic_statements = [] for l in matching_config_licenses: lic_statement = {} lic_statement[l['license_statement']] = {'type': l['license_type'], 'version': l['version']} lic_statements.append(lic_statement) for incoming_url, content in urls_contents.iteritems(): self.simple_extract(lic_statements, record, incoming_url, first_match=True, content=content, handler=config.publisher_name) new_licenses_count = len(record.license) # if we find a license, stop trying the different URL-s if new_licenses_count > current_licenses_count: break # if we find a license, stop trying the configs and record which config found it if new_licenses_count > current_licenses_count: # found it! successful_config = config break # if no config exists which can match the license, then try the flat list # do not try the flat list of statements if a matching config has been found # this keeps these "virtual" plugins, i.e. the configs, consistent with how # the rest of the system operates lic_statements = [] flat_license_list_success = False if len(matching_configs) <= 0: all_statements = LicenseStatement.all() all_statements = sorted( all_statements, key=lambda lic: ( lic.get('version', '') == '', # does it NOT have a version? last! # see http://stackoverflow.com/questions/9386501/sorting-in-python-and-empty-strings len(lic['license_statement']) # length of license statement ) ) for l in all_statements: lic_statement = {} lic_statement[l['license_statement']] = {'type': l['license_type'], 'version': l.get('version', '')} lic_statements.append(lic_statement) for incoming_url, content in urls_contents.iteritems(): self.simple_extract(lic_statements, record, incoming_url, first_match=True, content=content) # default handler - the plugin's name new_licenses_count = len(record.license) # if we find a license, stop trying the different URL-s if new_licenses_count > current_licenses_count: break if new_licenses_count > current_licenses_count: # one of the flat license index did it flat_license_list_success = True if successful_config: return successful_config.publisher_name, self.__version__ elif flat_license_list_success: return self._short_name, self.__version__ # in case everything fails, return 'oag' as the handler to # be consistent with the failure handler in the workflow module # so that way, all "completely failed" licenses will have 'oag' # on them, except that the GSM ones will have the GSM's current # version return 'oag', self.__version__