def create(self, should_exist=False): """create the filename if it doesn't exist, otherwise if it should (and does not) exit on error. """ if should_exist: if not os.path.exists(self.filename): # Might be provided prefix contenders = glob("%s*" % os.path.join(self.data_base, self.parser.uid)) if len(contenders) == 1: self.parser.uid = re.sub( "(%s/|[.]json)" % self.data_base, "", contenders[0], ) elif len(contenders) > 1: raise MultipleReposExistError(self.parser.uid) else: raise RepoNotFoundError(self.parser.uid) self.data = self.load() self.taxonomy = self.load_taxonomy() self.criteria = self.load_criteria() if not os.path.exists(self.parser_dir): mkdir_p(self.parser_dir) # If it's the first time saving, create basic file if not should_exist: self.data = { "parser": self.parser.name, "uid": self.parser.uid, "url": self.parser.get_url(), "data": self.parser.export(), } self.save()
def create_database(self, config_dir): """Create the database. The parent folder must exist.""" self.data_base = os.path.abspath(os.path.join(config_dir, "database")) if not os.path.exists(config_dir): raise DirectoryNotFoundError( config_dir, "must exist to create database there") if not os.path.exists(self.data_base): mkdir_p(self.data_base)
def export_web_static(export_dir, base_url, client, force=False): """Export a running web interface to a folder. If the folder exists, the user must use force. This should be run via: rse export --type static-web [export_dir] If the user manually starts the server, the user needs to do: export RSE_DISABLE_ANNOTATE=True before the server is started to disable the annotation interface button. This will be fixed in a future PR to have an interface that submits an issue to do an annotation, but this needs to be developed first. Arguments: - export_dir (str) : the path to an export directory - base_url (str) : the base url of the server, including port - client (Encyclopedia) : the encyclopedia to use - force (bool) : if directory exists, overwrite """ print(f"Starting export for {base_url}") time.sleep(2) # Ensure that the server is running try: requests.head(base_url).status_code == 200 except: bot.info( "Please export after the server is running: export --type static-web [export_dir]" ) return # Output directory cannot exist if force if os.path.exists(export_dir) and not force: sys.exit(f"{export_dir} exists, use --force to overwrite.") # Create export directory if it doesn't exist if not os.path.exists(export_dir): os.mkdir(export_dir) # Copy static files static_files = os.path.join(export_dir, "static") if not os.path.exists(static_files): shutil.copytree(os.path.join(here, "static"), static_files) # Prepare urls (and filepath relative to export_dir) for export urls = {base_url: "index.html"} # Create static data export data = [] # Add repos and static annotation for repo in client.list(): repo = client.get(repo[0]) repo_path = os.path.join("repository", repo.uid) data.append({ "uid": repo.uid, "url": repo.url, "rel": "%s%s" % (RSE_URL_PREFIX, repo_path), "avatar": repo.avatar, "description": repo.description, }) # Currently don't link to repository page # urls["%s%s%s" % (base_url, RSE_URL_PREFIX, repo_path)] = os.path.join( # repo_path, "index.html" # ) # Static annotation endpoints for annotation_type in ["criteria", "taxonomy"]: urls["%s%s%s/annotate-%s" % (base_url, RSE_URL_PREFIX, repo_path, annotation_type)] = os.path.join( repo_path, "annotate-%s" % annotation_type, "index.html") # Repository API endpoints urls["%s%sapi/repos/%s" % (base_url, RSE_URL_PREFIX, repo.uid)] = os.path.join( "api", "repos", repo.uid, "index.json") # Add API endpoints urls["%s%sapi" % (base_url, RSE_URL_PREFIX)] = os.path.join( "api", "index.json") urls["%s%sapi/repos" % (base_url, RSE_URL_PREFIX)] = os.path.join( "api", "repos", "index.json") for parser in ["github", "gitlab"]: urls["%s%sapi/repos/parser/%s" % (base_url, RSE_URL_PREFIX, parser)] = os.path.join( "api", "repos", "parser", parser, "index.json") urls["%s%sapi/taxonomy" % (base_url, RSE_URL_PREFIX)] = os.path.join( "api", "taxonomy", "index.json") urls["%s%sapi/criteria" % (base_url, RSE_URL_PREFIX)] = os.path.join( "api", "criteria", "index.json") # Add search, criteria, and taxonomy for term in ["search", "criteria", "taxonomy"]: urls["%s%s%s" % (base_url, RSE_URL_PREFIX, term)] = os.path.join( term, "index.html") for url, outfile in urls.items(): # Skip if we've already created it if os.path.exists(outfile): continue # Update the output file with the repository outfile = os.path.join(export_dir, outfile) # Create nested output folder, if doesn't exist out_dir = os.path.dirname(outfile) if not os.path.exists(out_dir): mkdir_p(out_dir) # Url might have a prefix response = requests.get(url, headers={"User-Agent": get_user_agent()}) if response.status_code == 200: write_file(outfile, response.text) else: print(f"Issue parsing {url}") print("Generating data export") write_json(data, os.path.join(export_dir, "data.json")) print("Export is complete!")