Example #1
0
    def sanity_check(self):
        filesnpaths.is_file_tab_delimited(self.view_data_path)
        if self.tree_file_path:
            filesnpaths.is_proper_newick(self.tree_file_path)

        self.check_output_directory()

        new_view_data_path = self.get_output_file_path('view_data.txt')
        shutil.copyfile(self.view_data_path, new_view_data_path)
        self.view_data_path = new_view_data_path

        if self.tree_file_path:
            new_tree_path = self.get_output_file_path('tree.txt')
            shutil.copyfile(self.tree_file_path, new_tree_path)
            self.tree_file_path = new_tree_path

        if self.additional_view_data_file_path:
            new_additional_view_data_file_path = self.get_output_file_path('additional_view_data.txt')
            shutil.copyfile(self.additional_view_data_file_path, new_additional_view_data_file_path)
            self.additional_view_data_file_path = new_additional_view_data_file_path

        if self.samples_info_file_path:
            new_samples_info_file_path = self.get_output_file_path('anvio_samples_info.txt')
            shutil.copyfile(self.samples_info_file_path, new_samples_info_file_path)
            self.samples_info_file_path = new_samples_info_file_path


        self.sanity_checked = True
Example #2
0
    def sanity_check(self):
        self.distance = self.distance or constants.distance_metric_default
        self.linkage = self.linkage or constants.linkage_method_default

        clustering.is_distance_and_linkage_compatible(self.distance, self.linkage)

        filesnpaths.is_file_tab_delimited(self.view_data_path)
        if self.tree_file_path:
            filesnpaths.is_proper_newick(self.tree_file_path)

        self.check_output_directory()

        new_view_data_path = self.get_output_file_path('view_data.txt')
        shutil.copyfile(self.view_data_path, new_view_data_path)
        self.view_data_path = new_view_data_path

        if self.tree_file_path:
            new_tree_path = self.get_output_file_path('tree.txt')
            shutil.copyfile(self.tree_file_path, new_tree_path)
            self.tree_file_path = new_tree_path

        if self.additional_view_data_file_path:
            new_additional_view_data_file_path = self.get_output_file_path('additional_view_data.txt')
            shutil.copyfile(self.additional_view_data_file_path, new_additional_view_data_file_path)
            self.additional_view_data_file_path = new_additional_view_data_file_path

        if self.samples_info_file_path:
            new_samples_info_file_path = self.get_output_file_path('anvio_samples_info.txt')
            shutil.copyfile(self.samples_info_file_path, new_samples_info_file_path)
            self.samples_info_file_path = new_samples_info_file_path


        self.sanity_checked = True
Example #3
0
    def sanity_check(self):
        self.distance = self.distance or constants.distance_metric_default
        self.linkage = self.linkage or constants.linkage_method_default

        clustering.is_distance_and_linkage_compatible(self.distance, self.linkage)

        filesnpaths.is_file_tab_delimited(self.view_data_path)
        if self.tree_file_path:
            filesnpaths.is_proper_newick(self.tree_file_path)

        self.check_output_directory()

        new_view_data_path = self.get_output_file_path('view_data.txt')
        shutil.copyfile(self.view_data_path, new_view_data_path)
        self.view_data_path = new_view_data_path

        if self.tree_file_path:
            new_tree_path = self.get_output_file_path('tree.txt')
            shutil.copyfile(self.tree_file_path, new_tree_path)
            self.tree_file_path = new_tree_path

        if self.additional_view_data_file_path:
            new_additional_view_data_file_path = self.get_output_file_path('additional_view_data.txt')
            shutil.copyfile(self.additional_view_data_file_path, new_additional_view_data_file_path)
            self.additional_view_data_file_path = new_additional_view_data_file_path

        if self.samples_info_file_path:
            new_samples_info_file_path = self.get_output_file_path('anvio_samples_info.txt')
            shutil.copyfile(self.samples_info_file_path, new_samples_info_file_path)
            self.samples_info_file_path = new_samples_info_file_path


        self.sanity_checked = True
Example #4
0
    def run_command(self, input_file_path, output_file_path):
        input_file = open(input_file_path, 'rb')

        fasttree = Popen(self.command, stdout=PIPE, stdin=PIPE, stderr=PIPE)
        output = fasttree.communicate(input=input_file.read())
        input_file.close()

        output_stdout = output[0].decode().rstrip()
        output_stderr = output[1].decode().splitlines()

        run.info("Version", output_stderr[0])
        warning = ""
        for line in output_stderr[1:]:
            if len(warning) > 0 or line.startswith("WARNING! "):
                warning += line + "\n"
                if line == "":
                    run.warning(warning)
                    warning = ""
            elif line.startswith("      "):
                pass
            elif 'seconds' in line:
                pass
            else:
                line = line.split(":")
                if len(line) == 2:
                    run.info(line[0], line[1].strip())
                else:
                    run.info("Info", ":".join(line))

        if filesnpaths.is_proper_newick(output_stdout):
            output_file = open(output_file_path, 'w')
            output_file.write(output_stdout + '\n')
            output_file.close()

            run.info('FastTree output newick file', output_file_path, mc='green', nl_before=1, nl_after=1)
Example #5
0
    def process_single_order_data(self, single_order_path, single_order_name):
        """Just inject a single order into the `self.samples_order_dict`"""

        if not single_order_path:
            return

        if not single_order_name:
            raise SamplesError("You provided a file for a single order, but not a name for it. This is a no no :/")

        filesnpaths.is_file_plain_text(single_order_path)

        single_order_file_content = [l.strip('\n') for l in open(single_order_path, 'rU').readlines()]

        if len(single_order_file_content) != 1:
            raise SamplesError("The single order file should contain a single line of information. It can't have nothing,\
                                it can't have too much. Just a single newick tree, or a comma-separated list of sample\
                                names.")

        _order = single_order_file_content.pop()

        # if you are reading this line, please brace yourself to possibly one of the silliest
        # bunch of lines in the anvi'o codebase. the reason we are doing this this way is quite
        # a long story, and deserves a FIXME, but in order to utilize the excellent function
        # in the filesnpaths module to check the contents of the samples order dict rigirously,
        # we need to have this information in a file. a better way could have been implementing
        # a filesnpaths.is_proper_samples_order_content function next to the currently available
        # filesnpaths.is_proper_samples_order_file (the latter would call the former with a dict
        # and it would be much more flexible), but we can't import utils form within filesnpaths.
        # without utils we don't have a get_TAB_delimited_file_as_dictionary function, and we are
        # definitely not going to implement it in two places :( recovering from a poor design by
        # doing something even poorer? couldn't have we fixed this once and for all instead of
        # writing this paragraph? well. just remember that you are thinking about a rethorical
        # question in a comment section. so sometimes we do things that are not quite productive.
        temp_samples_order_file_path = filesnpaths.get_temp_file_path()
        temp_samples_order_file = open(temp_samples_order_file_path, 'w')
        temp_samples_order_file.write('\t'.join(['attributes', 'basic', 'newick']) + '\n')

        if filesnpaths.is_proper_newick(_order, dont_raise=True):
            temp_samples_order_file.write('\t'.join([single_order_name, '', _order]) + '\n')
            self.samples_order_dict[single_order_name] = {'newick': _order, 'basic': None}
        else:
            temp_samples_order_file.write('\t'.join([single_order_name, _order, '']) + '\n')
            self.samples_order_dict[single_order_name] = {'basic': _order, 'newick': None}

        temp_samples_order_file.close()

        sample_names_in_samples_order_file = filesnpaths.is_proper_samples_order_file(temp_samples_order_file_path)
        os.remove(temp_samples_order_file_path)

        if not self.sample_names_in_samples_information_file:
            self.sample_names_in_samples_order_file = sample_names_in_samples_order_file

        self.available_orders.add(single_order_name)

        self.run.info('Samples order', "A single order for '%s' is also loaded" % single_order_name, quiet=self.quiet)
Example #6
0
    def process_single_order_data(self, single_order_path, single_order_name):
        """Just inject a single order into the `self.samples_order_dict`"""

        if not single_order_path:
            return

        if not single_order_name:
            raise SamplesError("You provided a file for a single order, but not a name for it. This is a no no :/")

        filesnpaths.is_file_plain_text(single_order_path)

        single_order_file_content = [l.strip('\n') for l in open(single_order_path, 'rU').readlines()]

        if len(single_order_file_content) != 1:
            raise SamplesError("The single order file should contain a single line of information. It can't have nothing,\
                                it can't have too much. Just a single newick tree, or a comma-separated list of sample\
                                names.")

        _order = single_order_file_content.pop()

        # if you are reading this line, please brace yourself to possibly one of the silliest
        # bunch of lines in the anvi'o codebase. the reason we are doing this this way is quite
        # a long story, and deserves a FIXME, but in order to utilize the excellent function
        # in the filesnpaths module to check the contents of the samples order dict rigirously,
        # we need to have this information in a file. a better way could have been implementing
        # a filesnpaths.is_proper_samples_order_content function next to the currently available
        # filesnpaths.is_proper_samples_order_file (the latter would call the former with a dict
        # and it would be much more flexible), but we can't import utils form within filesnpaths.
        # without utils we don't have a get_TAB_delimited_file_as_dictionary function, and we are
        # definitely not going to implement it in two places :( recovering from a poor design by
        # doing something even poorer? couldn't have we fixed this once and for all instead of
        # writing this paragraph? well. just remember that you are thinking about a rethorical
        # question in a comment section. so sometimes we do things that are not quite productive.
        temp_samples_order_file_path = filesnpaths.get_temp_file_path()
        temp_samples_order_file = open(temp_samples_order_file_path, 'w')
        temp_samples_order_file.write('\t'.join(['attributes', 'basic', 'newick']) + '\n')

        if filesnpaths.is_proper_newick(_order, dont_raise=True):
            temp_samples_order_file.write('\t'.join([single_order_name, '', _order]) + '\n')
            self.samples_order_dict[single_order_name] = {'newick': _order, 'basic': None}
        else:
            temp_samples_order_file.write('\t'.join([single_order_name, _order, '']) + '\n')
            self.samples_order_dict[single_order_name] = {'basic': _order, 'newick': None}

        temp_samples_order_file.close()

        sample_names_in_samples_order_file = filesnpaths.is_proper_samples_order_file(temp_samples_order_file_path)
        os.remove(temp_samples_order_file_path)

        if not self.sample_names_in_samples_information_file:
            self.sample_names_in_samples_order_file = sample_names_in_samples_order_file

        self.available_orders.add(single_order_name)

        self.run.info('Samples order', "A single order for '%s' is also loaded" % single_order_name, quiet=self.quiet)
Example #7
0
    def sanity_check(self):
        filesnpaths.is_file_tab_delimited(self.view_data_path)
        if self.tree_file_path:
            filesnpaths.is_proper_newick(self.tree_file_path)

        self.check_output_directory()

        new_view_data_path = self.get_output_file_path('view_data.txt')
        shutil.copyfile(self.view_data_path, new_view_data_path)
        self.view_data_path = new_view_data_path

        if self.tree_file_path:
            new_tree_path = self.get_output_file_path('tree.txt')
            shutil.copyfile(self.tree_file_path, new_tree_path)
            self.tree_file_path = new_tree_path

        if self.additional_view_data_file_path:
            new_additional_view_data_file_path = self.get_output_file_path(
                'additional_view_data.txt')
            shutil.copyfile(self.additional_view_data_file_path,
                            new_additional_view_data_file_path)
            self.additional_view_data_file_path = new_additional_view_data_file_path

        self.sanity_checked = True
Example #8
0
    def load_from_user_files(self, args):
        if self.contigs_db_path:
            raise ConfigError, "When you want to use the interactive interface in an ad hoc manner, you must\
                                not use a contigs database."

        if not self.profile_db_path:
            raise ConfigError, "Even when you want to use the interactive interface in an ad hoc manner by\
                                using the '--manual-mode' flag, you still need to declare a profile database.\
                                The profile database in this mode only used to read or store the 'state' of\
                                the display for visualization purposes. You DO NOT need to point to an already\
                                existing database, as anvi'o will generate an empty one for your if there is no\
                                profile database."

        if (not self.view_data_path) or (not self.tree):
            raise ConfigError, "When you are running the interactive interface in manual mode, you must declare\
                                each of the '-d', and '-t' parameters. Please see the documentation for help."

        if self.view:
            raise ConfigError, "You can't use '--view' parameter when you are running the interactive interface\
                                in manual mode"

        if self.show_views:
            raise ConfigError, "Sorry, there are no views to show in manual mode :/"

        if self.show_states:
            raise ConfigError, "Sorry, there are no states to show in manual mode :/"

        filesnpaths.is_file_exists(self.tree)
        filesnpaths.is_proper_newick(self.tree)

        view_data_path = os.path.abspath(self.view_data_path)
        self.p_meta['splits_fasta'] = os.path.abspath(self.fasta_file) if self.fasta_file else None
        self.p_meta['output_dir'] = None
        self.p_meta['views'] = {}
        self.p_meta['merged'] = True
        self.p_meta['default_view'] = 'single'
        self.p_meta['default_clustering'] = 'default'
        self.p_meta['available_clusterings'] = ['default']
        self.p_meta['clusterings'] = {'default': {'newick': open(os.path.abspath(self.tree)).read()}}

        self.default_view = self.p_meta['default_view']

        # sanity of the view data
        filesnpaths.is_file_tab_delimited(view_data_path)
        view_data_columns = utils.get_columns_of_TAB_delim_file(view_data_path, include_first_column=True)
        if not view_data_columns[0] == "contig":
            raise ConfigError, "The first row of the first column of the view data file must\
                                      say 'contig', which is not the case for your view data file\
                                      ('%s'). Please make sure this is a properly formatted view data\
                                      file." % (view_data_path)

        # load view data as the default view:
        self.views[self.default_view] = {'header': view_data_columns[1:],
                                         'dict': utils.get_TAB_delimited_file_as_dictionary(view_data_path)}
        self.split_names_ordered = self.views[self.default_view]['dict'].keys()

        # we assume that the sample names are the header of the view data, so we might as well set it up: 
        self.p_meta['samples'] = self.views[self.default_view]['header']

        # if we have an input FASTA file, we will set up the split_sequences and splits_basic_info dicts,
        # otherwise we will leave them empty
        self.splits_basic_info = {}
        self.split_sequences = None
        if self.p_meta['splits_fasta']:
            filesnpaths.is_file_fasta_formatted(self.p_meta['splits_fasta'])
            self.split_sequences = utils.get_FASTA_file_as_dictionary(self.p_meta['splits_fasta'])

            names_missing_in_FASTA = set(self.split_names_ordered) - set(self.split_sequences.keys())
            num_names_missing_in_FASTA = len(names_missing_in_FASTA)
            if num_names_missing_in_FASTA:
                raise ConfigError, 'Some of the names in your view data does not have corresponding entries in the\
                                    FASTA file you provided. Here is an example to one of those %d names that occur\
                                    in your data file, but not in the FASTA file: "%s"' % (num_names_missing_in_FASTA, names_missing_in_FASTA.pop())

            # setup a mock splits_basic_info dict
            for split_id in self.split_names_ordered:
                self.splits_basic_info[split_id] = {'length': len(self.split_sequences[split_id]),
                                                    'gc_content': utils.get_GC_content_for_sequence(self.split_sequences[split_id])}

        # create a new, empty profile database for ad hoc operations
        if not os.path.exists(self.profile_db_path):
            profile_db = ProfileDatabase(self.profile_db_path)
            profile_db.create({'db_type': 'profile', 'merged': True, 'contigs_db_hash': None, 'samples': ','.join(self.p_meta['samples'])})

        # create an instance of states table
        self.states_table = TablesForStates(self.profile_db_path, anvio.__profile__version__)

        # also populate collections, if there are any
        self.collections.populate_sources_dict(self.profile_db_path, anvio.__profile__version__)

        if self.title:
            self.title = self.title
Example #9
0
    def load_manual_mode(self, args):
        if self.contigs_db_path:
            raise ConfigError, "When you want to use the interactive interface in manual mode, you must\
                                not use a contigs database."

        # if the user is using an existing profile database, we need to make sure that it is not associated
        # with a contigs database, since it would mean that it is a full anvi'o profile database and should
        # not be included in manual operations.
        if os.path.exists(self.profile_db_path):
            profile_db = ProfileDatabase(self.profile_db_path)
            if profile_db.meta['contigs_db_hash']:
                raise ConfigError, "Well. It seems the profile database is associated with a contigs database,\
                                    which means using it in manual mode is not the best way to use it. Probably\
                                    what you wanted to do is to let the manual mode create a new profile database\
                                    for you. Simply type in a new profile database path (it can be a file name\
                                    that doesn't exist)."

        if not self.profile_db_path:
            raise ConfigError, "Even when you want to use the interactive interface in manual mode, you need\
                                to declare a profile database. The profile database in this mode only used to\
                                read or store the 'state' of the display for visualization purposes. You DO\
                                NOT need to point to an already existing database, as anvi'o will generate\
                                an empty one for your if there is no profile database."

        if not self.tree:
            raise ConfigError, "When you are running the interactive interface in manual mode, you must declare\
                                at least the tree file. Please see the documentation for help."

        if self.view:
            raise ConfigError, "You can't use '--view' parameter when you are running the interactive interface\
                                in manual mode"

        if self.show_views:
            raise ConfigError, "Sorry, there are no views to show in manual mode :/"

        if self.show_states:
            raise ConfigError, "Sorry, there are no states to show in manual mode :/"

        filesnpaths.is_file_exists(self.tree)
        tree = filesnpaths.is_proper_newick(self.tree)

        view_data_path = os.path.abspath(
            self.view_data_path) if self.view_data_path else None
        self.p_meta['splits_fasta'] = os.path.abspath(
            self.fasta_file) if self.fasta_file else None
        self.p_meta['output_dir'] = None
        self.p_meta['views'] = {}
        self.p_meta['merged'] = True
        self.p_meta['default_view'] = 'single'

        clustering_id = '%s:unknown:unknown' % filesnpaths.get_name_from_file_path(
            self.tree)
        self.p_meta['default_clustering'] = clustering_id
        self.p_meta['available_clusterings'] = [clustering_id]
        self.p_meta['clusterings'] = {
            clustering_id: {
                'newick':
                ''.join([
                    l.strip()
                    for l in open(os.path.abspath(self.tree)).readlines()
                ])
            }
        }

        self.default_view = self.p_meta['default_view']

        if self.view_data_path:
            # sanity of the view data
            filesnpaths.is_file_tab_delimited(view_data_path)
            view_data_columns = utils.get_columns_of_TAB_delim_file(
                view_data_path, include_first_column=True)
            if not view_data_columns[0] == "contig":
                raise ConfigError, "The first row of the first column of the view data file must\
                                    say 'contig', which is not the case for your view data file\
                                    ('%s'). Please make sure this is a properly formatted view data\
                                    file." % (view_data_path)

            # load view data as the default view:
            self.views[self.default_view] = {
                'header': view_data_columns[1:],
                'dict':
                utils.get_TAB_delimited_file_as_dictionary(view_data_path)
            }
        else:
            # no view data is provided... it is only the tree we have. we will creaet a mock 'view data dict'
            # here using what is in the tree.
            names_in_the_tree = [n.name for n in tree.get_leaves()]

            ad_hoc_dict = {}
            for item in names_in_the_tree:
                ad_hoc_dict[item] = {'names': item}

            self.views[self.default_view] = {
                'header': ['names'],
                'dict': ad_hoc_dict
            }

        self.split_names_ordered = self.views[self.default_view]['dict'].keys()

        # we assume that the sample names are the header of the view data, so we might as well set it up:
        self.p_meta['samples'] = self.views[self.default_view]['header']

        # if we have an input FASTA file, we will set up the split_sequences and splits_basic_info dicts,
        # otherwise we will leave them empty
        self.splits_basic_info = {}
        self.split_sequences = None
        if self.p_meta['splits_fasta']:
            filesnpaths.is_file_fasta_formatted(self.p_meta['splits_fasta'])
            self.split_sequences = utils.get_FASTA_file_as_dictionary(
                self.p_meta['splits_fasta'])

            names_missing_in_FASTA = set(self.split_names_ordered) - set(
                self.split_sequences.keys())
            num_names_missing_in_FASTA = len(names_missing_in_FASTA)
            if num_names_missing_in_FASTA:
                raise ConfigError, 'Some of the names in your view data does not have corresponding entries in the\
                                    FASTA file you provided. Here is an example to one of those %d names that occur\
                                    in your data file, but not in the FASTA file: "%s"' % (
                    num_names_missing_in_FASTA, names_missing_in_FASTA.pop())

            # setup a mock splits_basic_info dict
            for split_id in self.split_names_ordered:
                self.splits_basic_info[split_id] = {
                    'length':
                    len(self.split_sequences[split_id]),
                    'gc_content':
                    utils.get_GC_content_for_sequence(
                        self.split_sequences[split_id])
                }

        # create a new, empty profile database for manual operations
        if not os.path.exists(self.profile_db_path):
            profile_db = ProfileDatabase(self.profile_db_path)
            profile_db.create({
                'db_type': 'profile',
                'merged': True,
                'contigs_db_hash': None,
                'samples': ','.join(self.p_meta['samples'])
            })

        # create an instance of states table
        self.states_table = TablesForStates(self.profile_db_path,
                                            anvio.__profile__version__)

        # also populate collections, if there are any
        self.collections.populate_collections_dict(self.profile_db_path,
                                                   anvio.__profile__version__)

        if self.title:
            self.title = self.title
Example #10
0
    def load_from_user_files(self, args):
        if self.contigs_db_path:
            raise ConfigError, "When you want to use the interactive interface in an ad hoc manner, you must\
                                not use a contigs database."

        if not self.profile_db_path:
            raise ConfigError, "Even when you want to use the interactive interface in an ad hoc manner by\
                                using the '--manual-mode' flag, you still need to declare a profile database.\
                                The profile database in this mode only used to read or store the 'state' of\
                                the display for visualization purposes. You DO NOT need to point to an already\
                                existing database, as anvi'o will generate an empty one for your if there is no\
                                profile database."

        if (not self.view_data_path) or (not self.tree):
            raise ConfigError, "When you are running the interactive interface in manual mode, you must declare\
                                each of the '-d', and '-t' parameters. Please see the documentation for help."

        if self.view:
            raise ConfigError, "You can't use '--view' parameter when you are running the interactive interface\
                                in manual mode"

        if self.show_views:
            raise ConfigError, "Sorry, there are no views to show in manual mode :/"

        if self.show_states:
            raise ConfigError, "Sorry, there are no states to show in manual mode :/"

        filesnpaths.is_file_exists(self.tree)
        filesnpaths.is_proper_newick(self.tree)

        view_data_path = os.path.abspath(self.view_data_path)
        self.p_meta['splits_fasta'] = os.path.abspath(
            self.fasta_file) if self.fasta_file else None
        self.p_meta['output_dir'] = None
        self.p_meta['views'] = {}
        self.p_meta['merged'] = True
        self.p_meta['default_view'] = 'single'
        self.p_meta['default_clustering'] = 'default'
        self.p_meta['available_clusterings'] = ['default']
        self.p_meta['clusterings'] = {
            'default': {
                'newick': open(os.path.abspath(self.tree)).read()
            }
        }

        self.default_view = self.p_meta['default_view']

        # sanity of the view data
        filesnpaths.is_file_tab_delimited(view_data_path)
        view_data_columns = utils.get_columns_of_TAB_delim_file(
            view_data_path, include_first_column=True)
        if not view_data_columns[0] == "contig":
            raise ConfigError, "The first row of the first column of the view data file must\
                                      say 'contig', which is not the case for your view data file\
                                      ('%s'). Please make sure this is a properly formatted view data\
                                      file." % (view_data_path)

        # load view data as the default view:
        self.views[self.default_view] = {
            'header': view_data_columns[1:],
            'dict': utils.get_TAB_delimited_file_as_dictionary(view_data_path)
        }
        self.split_names_ordered = self.views[self.default_view]['dict'].keys()

        # we assume that the sample names are the header of the view data, so we might as well set it up:
        self.p_meta['samples'] = self.views[self.default_view]['header']

        # if we have an input FASTA file, we will set up the split_sequences and splits_basic_info dicts,
        # otherwise we will leave them empty
        self.splits_basic_info = {}
        self.split_sequences = None
        if self.p_meta['splits_fasta']:
            filesnpaths.is_file_fasta_formatted(self.p_meta['splits_fasta'])
            self.split_sequences = utils.get_FASTA_file_as_dictionary(
                self.p_meta['splits_fasta'])

            names_missing_in_FASTA = set(self.split_names_ordered) - set(
                self.split_sequences.keys())
            num_names_missing_in_FASTA = len(names_missing_in_FASTA)
            if num_names_missing_in_FASTA:
                raise ConfigError, 'Some of the names in your view data does not have corresponding entries in the\
                                    FASTA file you provided. Here is an example to one of those %d names that occur\
                                    in your data file, but not in the FASTA file: "%s"' % (
                    num_names_missing_in_FASTA, names_missing_in_FASTA.pop())

            # setup a mock splits_basic_info dict
            for split_id in self.split_names_ordered:
                self.splits_basic_info[split_id] = {
                    'length':
                    len(self.split_sequences[split_id]),
                    'gc_content':
                    utils.get_GC_content_for_sequence(
                        self.split_sequences[split_id])
                }

        # create a new, empty profile database for ad hoc operations
        if not os.path.exists(self.profile_db_path):
            profile_db = ProfileDatabase(self.profile_db_path)
            profile_db.create({
                'db_type': 'profile',
                'merged': True,
                'contigs_db_hash': None,
                'samples': ','.join(self.p_meta['samples'])
            })

        # create an instance of states table
        self.states_table = TablesForStates(self.profile_db_path,
                                            anvio.__profile__version__)

        # also populate collections, if there are any
        self.collections.populate_sources_dict(self.profile_db_path,
                                               anvio.__profile__version__)

        if self.title:
            self.title = self.title
Example #11
0
    def load_manual_mode(self, args):
        if self.contigs_db_path:
            raise ConfigError, "When you want to use the interactive interface in manual mode, you must\
                                not use a contigs database."

        # if the user is using an existing profile database, we need to make sure that it is not associated
        # with a contigs database, since it would mean that it is a full anvi'o profile database and should
        # not be included in manual operations.
        if os.path.exists(self.profile_db_path):
            profile_db = ProfileDatabase(self.profile_db_path)
            if profile_db.meta['contigs_db_hash']:
                raise ConfigError, "Well. It seems the profile database is associated with a contigs database,\
                                    which means using it in manual mode is not the best way to use it. Probably\
                                    what you wanted to do is to let the manual mode create a new profile database\
                                    for you. Simply type in a new profile database path (it can be a file name\
                                    that doesn't exist)."

        if not self.profile_db_path:
            raise ConfigError, "Even when you want to use the interactive interface in manual mode, you need\
                                to declare a profile database. The profile database in this mode only used to\
                                read or store the 'state' of the display for visualization purposes. You DO\
                                NOT need to point to an already existing database, as anvi'o will generate\
                                an empty one for your if there is no profile database."

        if not self.tree:
            raise ConfigError, "When you are running the interactive interface in manual mode, you must declare\
                                at least the tree file. Please see the documentation for help."

        if self.view:
            raise ConfigError, "You can't use '--view' parameter when you are running the interactive interface\
                                in manual mode"

        if self.show_views:
            raise ConfigError, "Sorry, there are no views to show in manual mode :/"

        if self.show_states:
            raise ConfigError, "Sorry, there are no states to show in manual mode :/"

        filesnpaths.is_file_exists(self.tree)
        tree = filesnpaths.is_proper_newick(self.tree)

        view_data_path = os.path.abspath(self.view_data_path) if self.view_data_path else None
        self.p_meta['splits_fasta'] = os.path.abspath(self.fasta_file) if self.fasta_file else None
        self.p_meta['output_dir'] = None
        self.p_meta['views'] = {}
        self.p_meta['merged'] = True
        self.p_meta['default_view'] = 'single'

        clustering_id = '%s:unknown:unknown' % filesnpaths.get_name_from_file_path(self.tree)
        self.p_meta['default_clustering'] = clustering_id
        self.p_meta['available_clusterings'] = [clustering_id]
        self.p_meta['clusterings'] = {clustering_id: {'newick': ''.join([l.strip() for l in open(os.path.abspath(self.tree)).readlines()])}}

        self.default_view = self.p_meta['default_view']

        if self.view_data_path:
            # sanity of the view data
            filesnpaths.is_file_tab_delimited(view_data_path)
            view_data_columns = utils.get_columns_of_TAB_delim_file(view_data_path, include_first_column=True)
            if not view_data_columns[0] == "contig":
                raise ConfigError, "The first row of the first column of the view data file must\
                                    say 'contig', which is not the case for your view data file\
                                    ('%s'). Please make sure this is a properly formatted view data\
                                    file." % (view_data_path)

            # load view data as the default view:
            self.views[self.default_view] = {'header': view_data_columns[1:],
                                             'dict': utils.get_TAB_delimited_file_as_dictionary(view_data_path)}
        else:
            # no view data is provided... it is only the tree we have. we will creaet a mock 'view data dict'
            # here using what is in the tree.
            names_in_the_tree = [n.name for n in tree.get_leaves()]

            ad_hoc_dict = {}
            for item in names_in_the_tree:
                ad_hoc_dict[item] = {'names': item}

            self.views[self.default_view] = {'header': ['names'],
                                             'dict': ad_hoc_dict}

        self.split_names_ordered = self.views[self.default_view]['dict'].keys()

        # we assume that the sample names are the header of the view data, so we might as well set it up:
        self.p_meta['samples'] = self.views[self.default_view]['header']

        # if we have an input FASTA file, we will set up the split_sequences and splits_basic_info dicts,
        # otherwise we will leave them empty
        self.splits_basic_info = {}
        self.split_sequences = None
        if self.p_meta['splits_fasta']:
            filesnpaths.is_file_fasta_formatted(self.p_meta['splits_fasta'])
            self.split_sequences = utils.get_FASTA_file_as_dictionary(self.p_meta['splits_fasta'])

            names_missing_in_FASTA = set(self.split_names_ordered) - set(self.split_sequences.keys())
            num_names_missing_in_FASTA = len(names_missing_in_FASTA)
            if num_names_missing_in_FASTA:
                raise ConfigError, 'Some of the names in your view data does not have corresponding entries in the\
                                    FASTA file you provided. Here is an example to one of those %d names that occur\
                                    in your data file, but not in the FASTA file: "%s"' % (num_names_missing_in_FASTA, names_missing_in_FASTA.pop())

            # setup a mock splits_basic_info dict
            for split_id in self.split_names_ordered:
                self.splits_basic_info[split_id] = {'length': len(self.split_sequences[split_id]),
                                                    'gc_content': utils.get_GC_content_for_sequence(self.split_sequences[split_id])}

        # create a new, empty profile database for manual operations
        if not os.path.exists(self.profile_db_path):
            profile_db = ProfileDatabase(self.profile_db_path)
            profile_db.create({'db_type': 'profile', 'merged': True, 'contigs_db_hash': None, 'samples': ','.join(self.p_meta['samples'])})

        # create an instance of states table
        self.states_table = TablesForStates(self.profile_db_path, anvio.__profile__version__)

        # also populate collections, if there are any
        self.collections.populate_collections_dict(self.profile_db_path, anvio.__profile__version__)

        if self.title:
            self.title = self.title