Esempio n. 1
0
def is_this_name_OK_for_database(variable_name, content, allowed_chars = allowed_chars.replace('.', '')):
    if content[0] in digits:
        raise ConfigError, "Sorry, '%s' can't start with a digit. Long story. Please specify a sample name\
                            that starts with an ASCII letter." % variable_name

    if len([c for c in content if c not in allowed_chars]):
        raise ConfigError, "Well, '%s' parameter contains characters that anvi'o does not like. Please\
                            limit the characters to ASCII letters, digits, the underscore and dash\
                            characters ('_', '-')." % variable_name
Esempio n. 2
0
def is_this_name_OK_for_database(variable_name, content, allowed_chars = allowed_chars.replace('.', '')):
    if content[0] in digits:
        raise ConfigError, "Sorry, '%s' can't start with a digit. Long story. Please specify a sample name\
                            that starts with an ASCII letter." % variable_name

    if len([c for c in content if c not in allowed_chars]):
        raise ConfigError, "Well, '%s' parameter contains characters that anvi'o does not like. Please\
                            limit the characters to ASCII letters, digits, the underscore and dash\
                            characters ('_', '-')." % variable_name
Esempio n. 3
0
def get_HMM_sources_dictionary(source_dirs=[]):
    if type(source_dirs) != type([]):
        raise ConfigError, "source_dirs parameter must be a list (get_HMM_sources_dictionary)."

    sources = {}
    allowed_chars_for_proper_sources = allowed_chars.replace(".", "").replace("-", "")
    PROPER = (
        lambda w: not len([c for c in w if c not in allowed_chars_for_proper_sources])
        and len(w) >= 3
        and w[0] not in "_0123456789"
    )

    for source in source_dirs:
        if source.endswith("/"):
            source = source[:-1]

        if not PROPER(os.path.basename(source)):
            raise ConfigError, "One of the search database directories ('%s') contains characters in its name\
                                anvio does not like. Directory names should be at least three characters long\
                                and must not contain any characters but ASCII letters, digits and\
                                underscore" % os.path.basename(
                source
            )

        for f in ["reference.txt", "kind.txt", "genes.txt", "genes.hmm.gz"]:
            if not os.path.exists(os.path.join(source, f)):
                raise ConfigError, "Each search database directory must contain following files:\
                                    'kind.txt', 'reference.txt', 'genes.txt', and 'genes.hmm.gz'. %s does not seem\
                                    to be a proper source." % os.path.basename(
                    source
                )

        ref = open(os.path.join(source, "reference.txt")).readlines()[0].strip()
        kind = open(os.path.join(source, "kind.txt")).readlines()[0].strip()
        if not PROPER(kind):
            raise ConfigError, "'kind.txt' defines the kind of search this database offers. This file must contain a single\
                                word that is at least three characters long, and must not contain any characters but\
                                ASCII letters, digits, and underscore. Here are some nice examples: 'singlecopy',\
                                or 'pathogenicity', or 'noras_selection'. But yours is '%s'." % (
                kind
            )

        genes = get_TAB_delimited_file_as_dictionary(
            os.path.join(source, "genes.txt"), column_names=["gene", "accession", "hmmsource"]
        )

        sources[os.path.basename(source)] = {
            "ref": ref,
            "kind": kind,
            "genes": genes.keys(),
            "model": os.path.join(source, "genes.hmm.gz"),
        }

    return sources
Esempio n. 4
0
def check_sample_id(sample_id):
    if sample_id:
        if sample_id[0] in digits:
            raise ConfigError, "Sample names can't start with digits. Long story. Please specify a sample name\
                                that starts with an ASCII letter (you can use '-s' parameter for that)."

        allowed_chars_for_samples = allowed_chars.replace('-', '').replace('.', '')
        if len([c for c in sample_id if c not in allowed_chars_for_samples]):
            raise ConfigError, "Sample name ('%s') contains characters that anvio does not like. Please\
                                limit the characters that make up the project name to ASCII letters,\
                                digits, and the underscore character ('_')." % sample_id
Esempio n. 5
0
def check_sample_id(sample_id):
    if sample_id:
        if sample_id[0] in digits:
            raise ConfigError, "Sample names can't start with digits. Long story. Please specify a sample name\
                                that starts with an ASCII letter (you may want to check '-s' parameter to set\
                                a sample name if your client permits (otherwise you are going to have to edit\
                                your input files))."

        allowed_chars_for_samples = allowed_chars.replace('-', '').replace('.', '')
        if len([c for c in sample_id if c not in allowed_chars_for_samples]):
            raise ConfigError, "Sample name ('%s') contains characters that anvio does not like. Please\
                                limit the characters that make up the project name to ASCII letters,\
                                digits, and the underscore character ('_')." % sample_id
Esempio n. 6
0
def check_sample_id(sample_id):
    if sample_id:
        if sample_id[0] in digits:
            raise ConfigError, "Sample names can't start with digits. Long story. Please specify a sample name\
                                that starts with an ASCII letter (you may want to check '-s' parameter to set\
                                a sample name if your client permits (otherwise you are going to have to edit\
                                your input files))."

        allowed_chars_for_samples = allowed_chars.replace('-', '').replace('.', '')
        if len([c for c in sample_id if c not in allowed_chars_for_samples]):
            raise ConfigError, "Sample name ('%s') contains characters that anvio does not like. Please\
                                limit the characters that make up the project name to ASCII letters,\
                                digits, and the underscore character ('_')." % sample_id
Esempio n. 7
0
def get_HMM_sources_dictionary(source_dirs=[]):
    if type(source_dirs) != type([]):
        raise ConfigError, "source_dirs parameter must be a list (get_HMM_sources_dictionary)."

    sources = {}
    allowed_chars_for_proper_sources = allowed_chars.replace('.', '').replace(
        '-', '')
    PROPER = lambda w: not len([c for c in w if c not in allowed_chars_for_proper_sources]) \
                       and len(w) >= 3 \
                       and w[0] not in '_0123456789'

    for source in source_dirs:
        if source.endswith('/'):
            source = source[:-1]

        if not PROPER(os.path.basename(source)):
            raise ConfigError, "One of the search database directories ('%s') contains characters in its name\
                                anvio does not like. Directory names should be at least three characters long\
                                and must not contain any characters but ASCII letters, digits and\
                                underscore" % os.path.basename(source)

        for f in ['reference.txt', 'kind.txt', 'genes.txt', 'genes.hmm.gz']:
            if not os.path.exists(os.path.join(source, f)):
                raise ConfigError, "Each search database directory must contain following files:\
                                    'kind.txt', 'reference.txt', 'genes.txt', and 'genes.hmm.gz'. %s does not seem\
                                    to be a proper source." % os.path.basename(
                    source)

        ref = open(os.path.join(source,
                                'reference.txt')).readlines()[0].strip()
        kind = open(os.path.join(source, 'kind.txt')).readlines()[0].strip()
        if not PROPER(kind):
            raise ConfigError, "'kind.txt' defines the kind of search this database offers. This file must contain a single\
                                word that is at least three characters long, and must not contain any characters but\
                                ASCII letters, digits, and underscore. Here are some nice examples: 'singlecopy',\
                                or 'pathogenicity', or 'noras_selection'. But yours is '%s'." % (
                kind)

        genes = get_TAB_delimited_file_as_dictionary(
            os.path.join(source, 'genes.txt'),
            column_names=['gene', 'accession', 'hmmsource'])

        sources[os.path.basename(source)] = {
            'ref': ref,
            'kind': kind,
            'genes': genes.keys(),
            'model': os.path.join(source, 'genes.hmm.gz')
        }

    return sources