def err(self): """Return error stream. Returns empty string if empty or doesn't exist. Returns (str) : error stream written to file """ if os.path.exists(self.stderr.name): return read_file(self.stderr.name) return ""
def out(self): """Return output stream. Returns empty string if empty or doesn't exist. Returns (str) : output stream written to file """ if os.path.exists(self.stdout.name): return read_file(self.stdout.name) return ""
def _import_annotation(self, input_file, username, stop_line="## Criteria"): """A general helper (private) function to import an annotation, meaning we parse a repository and return additional lines for parsing. """ if not username or not input_file: raise RuntimeError( "A username and input file are required to import annotation criteria." ) if not os.path.exists(input_file): raise FileNotFoundError(input_file) lines = read_file(input_file) line = lines.pop(0) # Find the repository name while stop_line not in line: match = re.search(repository_regex, line) if match: break line = lines.pop(0) # Retrieve the match if not match: raise RuntimeError(f"repository pattern not found in {input_file}") reponame = match.group() parser = get_parser(reponame, config=self.config) repo = self.get(parser.uid) return repo, lines
def bulk_add(self, filename): """Given a filename with a single list of repos, add each""" repos = [] if os.path.exists(filename): for name in read_file(filename): uid = name.strip() repos += [self.add(uid, quiet=True)] or [] return repos
def bulk_update(self, filename, rewrite=False): """Given a filename with a single list of repos, add each""" repos = [] if os.path.exists(filename): for name in read_file(filename): uid = name.strip() try: repos += [self.update(uid, rewrite=rewrite)] except RepoNotFoundError: pass return repos
def load_criteria(self): """Given a repository directory, load criteria files if they exist""" criteria = {} for filename in glob(f"{self.parser_dir}/criteria*.tsv"): uid = (os.path.basename(filename).replace("criteria-", "").replace(".tsv", "")) content = read_file(filename) if uid not in criteria: criteria[uid] = {} for row in content: row = row.strip() if not row: continue username, response = row.split("\t") criteria[uid][username] = response return criteria
def load_taxonomy(self): """Given a repository directory, load taxonomy annotations if they exist The taxonomy.tsv file should be a tab separated file with: username category-unique-id. This means that we keep a record of who has categorized what, and load this information into the taxonomy dictionary (organized by the category-unique-id which then has a total count and list of users). """ taxonomy = {} taxonomy_file = os.path.join(self.parser_dir, "taxonomy.tsv") if os.path.exists(taxonomy_file): content = read_file(taxonomy_file) for row in content: row = row.strip() if not row: continue username, uids = row.split("\t") taxonomy[username] = [x.strip() for x in uids.split(",")] return taxonomy