def is_this_name_OK_for_database(variable_name, content, allowed_chars = allowed_chars.replace('.', '')): if content[0] in digits: raise ConfigError, "Sorry, '%s' can't start with a digit. Long story. Please specify a sample name\ that starts with an ASCII letter." % variable_name if len([c for c in content if c not in allowed_chars]): raise ConfigError, "Well, '%s' parameter contains characters that anvi'o does not like. Please\ limit the characters to ASCII letters, digits, the underscore and dash\ characters ('_', '-')." % variable_name
def get_HMM_sources_dictionary(source_dirs=[]): if type(source_dirs) != type([]): raise ConfigError, "source_dirs parameter must be a list (get_HMM_sources_dictionary)." sources = {} allowed_chars_for_proper_sources = allowed_chars.replace(".", "").replace("-", "") PROPER = ( lambda w: not len([c for c in w if c not in allowed_chars_for_proper_sources]) and len(w) >= 3 and w[0] not in "_0123456789" ) for source in source_dirs: if source.endswith("/"): source = source[:-1] if not PROPER(os.path.basename(source)): raise ConfigError, "One of the search database directories ('%s') contains characters in its name\ anvio does not like. Directory names should be at least three characters long\ and must not contain any characters but ASCII letters, digits and\ underscore" % os.path.basename( source ) for f in ["reference.txt", "kind.txt", "genes.txt", "genes.hmm.gz"]: if not os.path.exists(os.path.join(source, f)): raise ConfigError, "Each search database directory must contain following files:\ 'kind.txt', 'reference.txt', 'genes.txt', and 'genes.hmm.gz'. %s does not seem\ to be a proper source." % os.path.basename( source ) ref = open(os.path.join(source, "reference.txt")).readlines()[0].strip() kind = open(os.path.join(source, "kind.txt")).readlines()[0].strip() if not PROPER(kind): raise ConfigError, "'kind.txt' defines the kind of search this database offers. This file must contain a single\ word that is at least three characters long, and must not contain any characters but\ ASCII letters, digits, and underscore. Here are some nice examples: 'singlecopy',\ or 'pathogenicity', or 'noras_selection'. But yours is '%s'." % ( kind ) genes = get_TAB_delimited_file_as_dictionary( os.path.join(source, "genes.txt"), column_names=["gene", "accession", "hmmsource"] ) sources[os.path.basename(source)] = { "ref": ref, "kind": kind, "genes": genes.keys(), "model": os.path.join(source, "genes.hmm.gz"), } return sources
def check_sample_id(sample_id): if sample_id: if sample_id[0] in digits: raise ConfigError, "Sample names can't start with digits. Long story. Please specify a sample name\ that starts with an ASCII letter (you can use '-s' parameter for that)." allowed_chars_for_samples = allowed_chars.replace('-', '').replace('.', '') if len([c for c in sample_id if c not in allowed_chars_for_samples]): raise ConfigError, "Sample name ('%s') contains characters that anvio does not like. Please\ limit the characters that make up the project name to ASCII letters,\ digits, and the underscore character ('_')." % sample_id
def check_sample_id(sample_id): if sample_id: if sample_id[0] in digits: raise ConfigError, "Sample names can't start with digits. Long story. Please specify a sample name\ that starts with an ASCII letter (you may want to check '-s' parameter to set\ a sample name if your client permits (otherwise you are going to have to edit\ your input files))." allowed_chars_for_samples = allowed_chars.replace('-', '').replace('.', '') if len([c for c in sample_id if c not in allowed_chars_for_samples]): raise ConfigError, "Sample name ('%s') contains characters that anvio does not like. Please\ limit the characters that make up the project name to ASCII letters,\ digits, and the underscore character ('_')." % sample_id
def get_HMM_sources_dictionary(source_dirs=[]): if type(source_dirs) != type([]): raise ConfigError, "source_dirs parameter must be a list (get_HMM_sources_dictionary)." sources = {} allowed_chars_for_proper_sources = allowed_chars.replace('.', '').replace( '-', '') PROPER = lambda w: not len([c for c in w if c not in allowed_chars_for_proper_sources]) \ and len(w) >= 3 \ and w[0] not in '_0123456789' for source in source_dirs: if source.endswith('/'): source = source[:-1] if not PROPER(os.path.basename(source)): raise ConfigError, "One of the search database directories ('%s') contains characters in its name\ anvio does not like. Directory names should be at least three characters long\ and must not contain any characters but ASCII letters, digits and\ underscore" % os.path.basename(source) for f in ['reference.txt', 'kind.txt', 'genes.txt', 'genes.hmm.gz']: if not os.path.exists(os.path.join(source, f)): raise ConfigError, "Each search database directory must contain following files:\ 'kind.txt', 'reference.txt', 'genes.txt', and 'genes.hmm.gz'. %s does not seem\ to be a proper source." % os.path.basename( source) ref = open(os.path.join(source, 'reference.txt')).readlines()[0].strip() kind = open(os.path.join(source, 'kind.txt')).readlines()[0].strip() if not PROPER(kind): raise ConfigError, "'kind.txt' defines the kind of search this database offers. This file must contain a single\ word that is at least three characters long, and must not contain any characters but\ ASCII letters, digits, and underscore. Here are some nice examples: 'singlecopy',\ or 'pathogenicity', or 'noras_selection'. But yours is '%s'." % ( kind) genes = get_TAB_delimited_file_as_dictionary( os.path.join(source, 'genes.txt'), column_names=['gene', 'accession', 'hmmsource']) sources[os.path.basename(source)] = { 'ref': ref, 'kind': kind, 'genes': genes.keys(), 'model': os.path.join(source, 'genes.hmm.gz') } return sources