Python get_names_order_from_newick_tree Examples, anvio.utils.get_names_order_from_newick_tree Python Examples

Example #1

0

Show file

    def sanity_check(self):
        if self.sample_names_in_samples_information_file and self.sample_names_in_samples_order_file:
            if sorted(self.sample_names_in_samples_information_file) != sorted(
                    self.sample_names_in_samples_order_file):
                raise SamplesError('OK. Samples described in the information file and order file are not identical :/ '
                                    'Here are the %d sample names in the information file: "%s", versus the %d sample '
                                    'names in the orders file: "%s". And here is the difference: "%s".'\
                                                            % (len(self.sample_names_in_samples_information_file),
                                                               self.sample_names_in_samples_information_file,
                                                               len(self.sample_names_in_samples_order_file),
                                                               self.sample_names_in_samples_order_file,
                                                               list(set(self.sample_names_in_samples_information_file) - set(self.sample_names_in_samples_order_file))))

        if not self.samples_information_default_layer_order:
            # we still don't have a default order. we will try to recover from that here
            # by looking into what we have in the samples order informaiton
            if not len(self.samples_order_dict):
                raise SamplesError(
                    "Something is missing. Anvi'o is having hard time coming up with a default samples "
                    "order for the samples database.")

            a_basic_order = [
                o['basic'].split(',') if o['basic'] else None
                for o in list(self.samples_order_dict.values())
            ][0]
            a_tree_order = utils.get_names_order_from_newick_tree([
                o['newick'] if o['newick'] else None
                for o in list(self.samples_order_dict.values())
            ][0])

            self.samples_information_default_layer_order = a_basic_order or a_tree_order

Example #2

0

Show file

def create_newick_file_from_matrix_file(
        observation_matrix_path,
        output_file_path,
        linkage=constants.linkage_method_default,
        distance=constants.distance_metric_default,
        norm='l1',
        progress=progress,
        transpose=False,
        items_order_file_path=None):
    is_distance_and_linkage_compatible(distance, linkage)
    filesnpaths.is_file_exists(observation_matrix_path)
    filesnpaths.is_file_tab_delimited(observation_matrix_path)

    filesnpaths.is_output_file_writable(output_file_path)
    if items_order_file_path:
        filesnpaths.is_output_file_writable(items_order_file_path)

    id_to_sample_dict, sample_to_id_dict, header, vectors = utils.get_vectors_from_TAB_delim_matrix(
        observation_matrix_path, transpose=transpose)

    vectors = np.array(vectors)

    newick = get_newick_from_matrix(vectors, distance, linkage, norm,
                                    id_to_sample_dict)

    if output_file_path:
        open(output_file_path, 'w').write(newick.strip() + '\n')

    if items_order_file_path:
        open(items_order_file_path, 'w').write(
            '\n'.join(utils.get_names_order_from_newick_tree(newick)) + '\n')

Example #3

0

Show file

File: bottleroutes.py Project: gelomerase/anvio

    def get_index_total_previous_and_next_items(self, order_name, item_name):
        previous_item_name = None
        next_item_name = None
        index = None
        total = None

        # FIXME: improve performance here
        items_order_entry = self.interactive.p_meta['item_orders'][order_name]
        items_order = None
        if items_order_entry['type'] == 'newick':
            items_order = utils.get_names_order_from_newick_tree(
                items_order_entry['data'])
        else:
            items_order = items_order_entry['data']

        index_of_item = items_order.index(item_name)
        if index_of_item:
            previous_item_name = items_order[index_of_item - 1]
        if (index_of_item + 1) < len(items_order):
            next_item_name = items_order[index_of_item + 1]

        index = index_of_item + 1
        total = len(items_order)

        return index, total, previous_item_name, next_item_name

Example #4

0

Show file

File: miscdata.py Project: qclayssen/anvio

    def get_layer_names(self, data_dict):
        layer_names = {}

        for data_key in data_dict:
            try:
                if data_dict[data_key]['data_type'] == 'newick':
                    layer_names[data_key] = utils.get_names_order_from_newick_tree(data_dict[data_key]['data_value'])
                else:
                    layer_names[data_key] = [s.strip() for s in data_dict[data_key]['data_value'].split(',')]
            except:
                raise ConfigError("Parsing the %s data for %s failed :/ We don't know why, because we are lazy. Please\
                                   take a loook at your input data and figure out :(" \
                                                                    % (data_dict[data_key]['data_type'], data_key))

        return layer_names

Example #5

0

Show file

File: clustering.py Project: meren/anvio

def create_newick_file_from_matrix_file(observation_matrix_path, output_file_path, linkage=constants.linkage_method_default,
                         distance=constants.distance_metric_default, norm='l1', progress=progress, transpose=False,
                         items_order_file_path=None):
    is_distance_and_linkage_compatible(distance, linkage)
    filesnpaths.is_file_exists(observation_matrix_path)
    filesnpaths.is_file_tab_delimited(observation_matrix_path)

    filesnpaths.is_output_file_writable(output_file_path)
    if items_order_file_path:
        filesnpaths.is_output_file_writable(items_order_file_path)

    id_to_sample_dict, sample_to_id_dict, header, vectors = utils.get_vectors_from_TAB_delim_matrix(observation_matrix_path, transpose=transpose)

    vectors = np.array(vectors)

    newick = get_newick_from_matrix(vectors, distance, linkage, norm, id_to_sample_dict)

    if output_file_path:
        open(output_file_path, 'w').write(newick.strip() + '\n')

    if items_order_file_path:
        open(items_order_file_path, 'w').write('\n'.join(utils.get_names_order_from_newick_tree(newick)) + '\n')

Example #6

0

Show file

File: samplesops.py Project: ascendo/anvio

    def sanity_check(self):
        if self.sample_names_in_samples_information_file and self.sample_names_in_samples_order_file:
            if sorted(self.sample_names_in_samples_information_file) != sorted(self.sample_names_in_samples_order_file):
                raise SamplesError, 'OK. Samples described in the information file and order file are not identical :/\
                                     Here are the %d sample names in the information file: "%s", versus the %d sample\
                                     names in the orders file: "%s". And here is the difference: "%s".'\
                                                            % (len(self.sample_names_in_samples_information_file),
                                                               self.sample_names_in_samples_information_file,
                                                               len(self.sample_names_in_samples_order_file),
                                                               self.sample_names_in_samples_order_file,
                                                               list(set(self.sample_names_in_samples_information_file) - set(self.sample_names_in_samples_order_file)))

        if not self.samples_information_default_layer_order:
            # we still don't have a default order. we will try to recover from that here
            # by looking into what we have in the samples order informaiton
            if not len(self.samples_order_dict):
                raise SamplesError, "Something is missing. Anvi'o is having hard time coming up with a default samples\
                                    order for the samples database."

            a_basic_order = [o['basic'] for o in self.samples_order_dict.values()][0]
            a_tree_order = utils.get_names_order_from_newick_tree([o['newick'] for o in self.samples_order_dict.values()][0])

            self.samples_information_default_layer_order = a_basic_order or a_tree_order

Example #7

0

Show file

File: interactive.py Project: psaxcode/anvio

    def __init__(self, args, external_clustering=None):
        self.args = args
        self.views = {}
        self.states_table = None
        self.p_meta = {}
        self.title = 'Unknown Project'

        A = lambda x: args.__dict__[x] if x in args.__dict__ else None
        self.mode = A('mode')
        self.profile_db_path = A('profile_db')
        self.contigs_db_path = A('contigs_db')
        self.collection_name = A('collection_name')
        self.manual_mode = A('manual_mode')
        self.split_hmm_layers = A('split_hmm_layers')
        self.taxonomic_level = A('taxonomic_level')
        self.additional_layers_path = A('additional_layers')
        self.additional_view_path = A('additional_view')
        self.samples_information_db_path = A('samples_information_db')
        self.view = A('view')
        self.fasta_file = A('fasta_file')
        self.view_data_path = A('view_data')
        self.tree = A('tree')
        self.title = A('title')
        self.output_dir = A('output_dir')
        self.show_views = A('show_views')
        self.state_autoload = A('state_autoload')
        self.collection_autoload = A('collection_autoload')
        self.show_states = A('show_states')
        self.skip_check_names = A('skip_check_names')
        self.list_collections = A('list_collections')
        self.distance = A('distance') or constants.distance_metric_default
        self.linkage = A('linkage') or constants.linkage_method_default

        # make sure early on that both the distance and linkage is OK.
        clustering.is_distance_and_linkage_compatible(self.distance,
                                                      self.linkage)

        self.split_names_ordered = None
        self.additional_layers = None
        self.auxiliary_profile_data_available = False

        self.samples_information_dict = {}
        self.samples_order_dict = {}
        self.samples_information_default_layer_order = {}

        # make sure the mode will be set properly
        if self.collection_name and self.manual_mode:
            raise ConfigError, "You can't anvi-interactive in manual mode with a collection name."

        self.external_clustering = external_clustering

        self.collections = ccollections.Collections()

        ContigsSuperclass.__init__(self, self.args)
        self.init_splits_taxonomy(self.taxonomic_level)

        if self.samples_information_db_path:
            samples_information_db = SamplesInformationDatabase(
                self.samples_information_db_path)
            self.samples_information_dict, self.samples_order_dict = samples_information_db.get_samples_information_and_order_dicts(
            )
            self.samples_information_default_layer_order = samples_information_db.get_samples_information_default_layer_order(
            )
            samples_information_db.disconnect()

        if self.contigs_db_path:
            self.completeness = Completeness(self.contigs_db_path)
            self.collections.populate_collections_dict(
                self.contigs_db_path, anvio.__contigs__version__)
        else:
            self.completeness = None

        if 'skip_init_functions' in args and not args.skip_init_functions:
            self.init_functions()

        # make sure we are not dealing with apples and oranges here.
        if self.contigs_db_path and self.profile_db_path:
            is_profile_db_and_contigs_db_compatible(self.profile_db_path,
                                                    self.contigs_db_path)

        self.P = lambda x: os.path.join(self.p_meta['output_dir'], x)
        self.cwd = os.getcwd()

        # here is where the big deal stuff takes place:
        if not self.mode and self.manual_mode:
            self.mode = 'manual'
            self.run.info('Mode', self.mode, mc='red')
            self.load_manual_mode(args)
        elif self.mode == 'refine':
            self.load_full_mode(args)
        elif self.collection_name or self.list_collections:
            self.mode = 'collection'
            self.run.info('Mode', self.mode, mc='green')
            self.load_collection_mode(args)
        else:
            self.mode = 'full'
            self.load_full_mode(args)

        # make sure the samples information database, if there is one, is in fact compatible with the profile database
        # the reason we are doing this here is because when we are in 'self.manual_mode', the self.p_meta['samples'] is
        # being filled within the self.load_manual_mode function based on the headers of the view data.
        if self.profile_db_path and self.samples_information_db_path:
            is_profile_db_and_samples_db_compatible(
                self.profile_db_path,
                self.samples_information_db_path,
                manual_mode_exception=self.manual_mode)

        if self.external_clustering:
            self.p_meta[
                'clusterings'] = self.clusterings = self.external_clustering[
                    'clusterings']
            self.p_meta['available_clusterings'] = self.clusterings.keys()
            self.p_meta['default_clustering'] = self.external_clustering[
                'default_clustering']

        if not self.state_autoload and 'default' in self.states_table.states:
            self.state_autoload = 'default'

        if not self.collection_autoload and 'default' in self.collections.collections_dict:
            self.collection_autoload = 'default'

        if not self.p_meta['clusterings']:
            if self.p_meta['merged']:
                raise ConfigError, "This merged profile database does not seem to have any hierarchical clustering\
                                    of splits that is required by the interactive interface. It may have been generated\
                                    by anvi-merge with the `--skip-hierarchical-clustering` flag, or hierarchical\
                                    clustering step may have been skipped by anvi-merge because you had too many stplits\
                                    to get the clustering in a reasonable amount of time. Please read the help menu for\
                                    anvi-merge, and/or refer to the tutorial: \
                                    http://merenlab.org/2015/05/01/anvio-tutorial/#clustering-during-merging"

            else:
                raise ConfigError, "This single profile database does not seem to have any hierarchical clustering\
                                    that is required by the interactive interface. You must use `--cluster-contigs`\
                                    flag for single profiles to access to this functionality. Please read the help\
                                    menu for anvi-profile, and/or refer to the tutorial."

        # self.split_names_ordered is going to be the 'master' names list. everything else is going to
        # need to match these names:
        self.split_names_ordered = utils.get_names_order_from_newick_tree(
            self.p_meta['clusterings'][
                self.p_meta['default_clustering']]['newick'])

        # now we knot what splits we are interested in (self.split_names_ordered), we can get rid of all the
        # unnecessary splits stored in views dicts.
        self.prune_view_dicts()

        # if there are any HMM search results in the contigs database other than 'singlecopy' sources,
        # we would like to visualize them as additional layers. following function is inherited from
        # Contigs DB superclass and will fill self.hmm_searches_dict if appropriate data is found in
        # search tables:
        if self.mode == 'full':
            self.init_non_singlecopy_gene_hmm_sources(
                self.split_names_ordered,
                return_each_gene_as_a_layer=self.split_hmm_layers)

        if self.additional_layers_path:
            filesnpaths.is_file_tab_delimited(self.additional_layers_path)
            self.additional_layers = self.additional_layers_path

        self.check_names_consistency()
        self.convert_view_data_into_json()

Example #8

0

Show file

File: interactive.py Project: ascendo/anvio

    def __init__(self, args, external_clustering=None):
        self.args = args
        self.views = {}
        self.states_table = None
        self.p_meta = {}
        self.title = 'Unknown Project'

        A = lambda x: args.__dict__[x] if x in args.__dict__ else None
        self.mode = A('mode')
        self.profile_db_path = A('profile_db')
        self.contigs_db_path = A('contigs_db')
        self.collection_name = A('collection_name')
        self.manual_mode = A('manual_mode')
        self.split_hmm_layers = A('split_hmm_layers')
        self.additional_layers_path = A('additional_layers')
        self.additional_view_path = A('additional_view')
        self.samples_information_db_path = A('samples_information_db')
        self.view = A('view')
        self.fasta_file = A('fasta_file')
        self.view_data_path = A('view_data')
        self.tree = A('tree')
        self.title = A('title')
        self.output_dir = A('output_dir')
        self.show_views = A('show_views')
        self.state = A('state')
        self.show_states = A('show_states')
        self.skip_check_names = A('skip_check_names')
        self.list_collections = A('list_collections')
        self.distance = A('distance') or constants.distance_metric_default
        self.linkage = A('linkage') or constants.linkage_method_default

        # make sure early on that both the distance and linkage is OK.
        clustering.is_distance_and_linkage_compatible(self.distance, self.linkage)

        self.split_names_ordered = None
        self.additional_layers = None
        self.auxiliary_profile_data_available = False

        self.samples_information_dict = {}
        self.samples_order_dict = {}
        self.samples_information_default_layer_order = {}

        # make sure the mode will be set properly
        if self.collection_name and self.manual_mode:
            raise ConfigError, "You can't anvi-interactive in manual mode with a collection name."

        self.external_clustering = external_clustering

        self.collections = ccollections.Collections()

        ContigsSuperclass.__init__(self, self.args)
        self.init_splits_taxonomy()

        if self.samples_information_db_path:
            samples_information_db = SamplesInformationDatabase(self.samples_information_db_path)
            self.samples_information_dict, self.samples_order_dict = samples_information_db.get_samples_information_and_order_dicts()
            self.samples_information_default_layer_order = samples_information_db.get_samples_information_default_layer_order()
            samples_information_db.disconnect()

        if self.contigs_db_path:
            self.completeness = Completeness(self.contigs_db_path)
            self.collections.populate_collections_dict(self.contigs_db_path, anvio.__contigs__version__)
        else:
            self.completeness = None

        if 'skip_init_functions' in args and not args.skip_init_functions:
            self.init_functions()

        # make sure we are not dealing with apples and oranges here.
        if self.contigs_db_path and self.profile_db_path:
            is_profile_db_and_contigs_db_compatible(self.profile_db_path, self.contigs_db_path)

        self.P = lambda x: os.path.join(self.p_meta['output_dir'], x)
        self.cwd = os.getcwd()

        # here is where the big deal stuff takes place:
        if not self.mode and self.manual_mode:
            self.mode = 'manual'
            self.run.info('Mode', self.mode, mc='red')
            self.load_manual_mode(args)
        elif self.mode == 'refine':
            self.load_full_mode(args)
        elif self.collection_name or self.list_collections:
            self.mode = 'collection'
            self.run.info('Mode', self.mode, mc='green')
            self.load_collection_mode(args)
        else:
            self.mode = 'full'
            self.load_full_mode(args)

        # make sure the samples information database, if there is one, is in fact compatible with the profile database
        # the reason we are doing this here is because when we are in 'self.manual_mode', the self.p_meta['samples'] is
        # being filled within the self.load_manual_mode function based on the headers of the view data.
        if self.profile_db_path and self.samples_information_db_path:
            is_profile_db_and_samples_db_compatible(self.profile_db_path, self.samples_information_db_path, manual_mode_exception=self.manual_mode)

        if self.external_clustering:
            self.p_meta['clusterings'] = self.clusterings = self.external_clustering['clusterings']
            self.p_meta['available_clusterings'] = self.clusterings.keys()
            self.p_meta['default_clustering'] = self.external_clustering['default_clustering']

        if not self.state and 'default' in self.states_table.states:
            self.state = 'default'

        if not self.p_meta['clusterings']:
            if self.p_meta['merged']:
                raise ConfigError, "This merged profile database does not seem to have any hierarchical clustering\
                                    of splits that is required by the interactive interface. It may have been generated\
                                    by anvi-merge with the `--skip-hierarchical-clustering` flag, or hierarchical\
                                    clustering step may have been skipped by anvi-merge because you had too many stplits\
                                    to get the clustering in a reasonable amount of time. Please read the help menu for\
                                    anvi-merge, and/or refer to the tutorial: \
                                    http://merenlab.org/2015/05/01/anvio-tutorial/#clustering-during-merging"
            else:
                raise ConfigError, "This single profile database does not seem to have any hierarchical clustering\
                                    that is required by the interactive interface. You must use `--cluster-contigs`\
                                    flag for single profiles to access to this functionality. Please read the help\
                                    menu for anvi-profile, and/or refer to the tutorial."

        # self.split_names_ordered is going to be the 'master' names list. everything else is going to
        # need to match these names:
        self.split_names_ordered = utils.get_names_order_from_newick_tree(self.p_meta['clusterings'][self.p_meta['default_clustering']]['newick'])

        # now we knot what splits we are interested in (self.split_names_ordered), we can get rid of all the
        # unnecessary splits stored in views dicts.
        self.prune_view_dicts()

        # if there are any HMM search results in the contigs database other than 'singlecopy' sources,
        # we would like to visualize them as additional layers. following function is inherited from
        # Contigs DB superclass and will fill self.hmm_searches_dict if appropriate data is found in
        # search tables:
        if self.mode == 'full':
            self.init_non_singlecopy_gene_hmm_sources(self.split_names_ordered, return_each_gene_as_a_layer=self.split_hmm_layers)

        if self.additional_layers_path:
            filesnpaths.is_file_tab_delimited(self.additional_layers_path)
            self.additional_layers = self.additional_layers_path

        self.check_names_consistency()
        self.convert_view_data_into_json()

Example #9

0

Show file

    def handle(self, *args, **options):
        conn = sqlite3.connect(options['userdb_path'][0])
        """
        CREATE TABLE users (
        0    login TEXT PRIMARY KEY, 
        1    firstname TEXT, 
        2    lastname TEXT, 
        3    email TEXT, 
        4    password TEXT,
        5    path TEXT,
        6    token TEXT, 
        7    accepted INTEGER,
        8    project TEXT,
        9    affiliation TEXT,
        10   ip TEXT,
        11   clearance TEXT,
        12   date TEXT,
        13   visit TEXT)
        """

        print("Migrating users table...")
        user_paths = {}

        for row in conn.execute('SELECT * FROM users;'):
            username = sanitize_username(row[0])
            password = "******" + row[4]
            email = row[3]
            is_active = True
            is_superuser = True if row[11] == 'admin' else False

            user_paths[row[5]] = username
            date_joined = datetime.strptime(
                row[12], "%Y-%m-%d").replace(tzinfo=timezone.utc)

            newuser = User(username=username,
                           password=password,
                           email=email,
                           is_active=is_active,
                           is_superuser=is_superuser,
                           is_staff=is_superuser,
                           date_joined=date_joined)
            newuser.save()

            fullname = "%s %s" % (row[1], row[2])
            institution = row[9]

            if len(fullname) < 2:
                fullname = None

            newuser_profile = UserProfile(user=newuser,
                                          fullname=fullname,
                                          orcid=None,
                                          institution=institution)
            newuser_profile.save()

        print(" - Successful.")
        print("Moving project files... ")

        for path in user_paths:
            username = user_paths[path]

            # old user project dir
            src = os.path.join(options['userfiles_path'][0], path)

            #new user project dir
            dst = os.path.join(settings.USER_DATA_DIR, username)

            try:
                shutil.copytree(src, dst)
            except FileNotFoundError:
                # if user path does not exists create empty dir for user
                os.makedirs(dst)

        print(" - Successful")
        print("Migratins project table... ")
        """
        CREATE TABLE projects (
        0    name TEXT PRIMARY KEY, 
        1    path TEXT, 
        2    user TEXT, 
        3    description TEXT)
        """
        for row in conn.execute('SELECT * FROM projects;'):
            name = row[0]
            slug = slugify(name).replace('-', '_')
            path = row[1]
            username = sanitize_username(row[2])
            description = row[3]

            #rename project files
            fileTypes_old = [
                'treeFile', 'dataFile', 'fastaFile', 'samplesOrderFile',
                'samplesInformationFile'
            ]
            fileTypes_new = [
                'tree.txt', 'data.txt', 'fasta.fa', 'samples-order.txt',
                'samples-info.txt'
            ]

            for i in range(5):
                try:
                    os.rename(
                        os.path.join(settings.USER_DATA_DIR, username, path,
                                     fileTypes_old[i]),
                        os.path.join(settings.USER_DATA_DIR, username, path,
                                     fileTypes_new[i]))
                except:
                    pass

            try:
                if not description or not len(description) > 0:
                    description = ""

                project = Project(name=name,
                                  slug=slug,
                                  user=User.objects.get(username=username),
                                  secret=path)

                samples_info = project.get_file_path('samples-order.txt',
                                                     default=None)
                samples_order = project.get_file_path('samples-info.txt',
                                                      default=None)

                if (samples_info
                        or samples_order) and not project.get_file_path(
                            'samples.db', default=None):
                    s = dbops.SamplesInformationDatabase(project.get_file_path(
                        'samples.db', dont_check_exists=True),
                                                         quiet=True)
                    s.create(samples_order, samples_info)

                interactive = project.get_interactive()

                # try to get number of leaves
                try:
                    leaves = get_names_order_from_newick_tree(
                        project.get_file_path('tree.txt', default=None))
                    project.num_leaves = len(leaves) if leaves != [''] else 0
                except:
                    project.num_leaves = 0

                # try to get number of layers
                try:
                    project.num_layers = len(
                        interactive.views['single']
                        [0]) - 1  # <- -1 because first column is contigs
                except:
                    project.num_layers = 0

                # store description
                dbops.update_description_in_db(
                    project.get_file_path('profile.db', default=None),
                    description or '')

                project.synchronize_num_states()
                project.synchronize_num_collections()

                project.save()

                # try to migrate old links.
                for row_links in conn.execute(
                        'SELECT * FROM views WHERE project LIKE \'%s\';' %
                    (name)):
                    old_link = OldLinks(
                        name=row_links[0],
                        user=sanitize_username(row_links[1]),
                        project=Project.objects.filter(
                            name=row_links[2],
                            user__username=sanitize_username(row_links[1]))[0],
                        is_public=True if row_links[3] == 1 else False,
                        token=row_links[4])
                    old_link.save()

                    project_link = ProjectLink(project=old_link.project,
                                               link=old_link.token)
                    project_link.save()

            except Exception as e:
                print(username + " " + name + " " + path +
                      " failed to create project, here is the exception " +
                      str(e))
                shutil.rmtree(
                    os.path.join(settings.USER_DATA_DIR, username, path))

        print(" - Successful")

Example #10

0

Show file

    def load_manual_mode(self, args):
        if self.contigs_db_path:
            raise ConfigError(
                "When you want to use the interactive interface in manual mode, you must\
                                not use a contigs database.")

        if not self.profile_db_path:
            raise ConfigError(
                "Even when you want to use the interactive interface in manual mode, you need\
                                to provide a profile database path. But you DO NOT need an already existing\
                                profile database, since anvi'o will generate an empty one for you. The profile\
                                database in this mode only used to read or store the 'state' of the display\
                                for visualization purposes, or to allow you to create and store collections."
            )

        # if the user is using an existing profile database, we need to make sure that it is not associated
        # with a contigs database, since it would mean that it is a full anvi'o profile database and should
        # not be included in manual operations.
        if filesnpaths.is_file_exists(self.profile_db_path, dont_raise=True):
            profile_db = ProfileDatabase(self.profile_db_path)
            if profile_db.meta['contigs_db_hash']:
                raise ConfigError(
                    "Well. It seems the profile database is associated with a contigs database,\
                                    which means using it in manual mode is not the best way to use it. Probably\
                                    what you wanted to do is to let the manual mode create a new profile database\
                                    for you. Simply type in a new profile database path (it can be a file name\
                                    that doesn't exist).")

        if not self.tree and not self.view_data_path:
            raise ConfigError(
                "You must be joking Mr. Feynman. No tree file, and no data file? What is it that\
                               anvi'o supposed to visualize? :(")

        if not self.tree:
            self.run.warning(
                "You haven't declared a tree file. Anvi'o will do its best to come up with an\
                              organization of your items.")

        if self.view:
            raise ConfigError(
                "You can't use '--view' parameter when you are running the interactive interface\
                                in manual mode")

        if self.show_views:
            raise ConfigError(
                "Sorry, there are no views to show in manual mode :/")

        if self.show_states:
            raise ConfigError(
                "Sorry, there are no states to show in manual mode :/")

        if self.tree:
            filesnpaths.is_file_exists(self.tree)
            newick_tree_text = ''.join([
                l.strip()
                for l in open(os.path.abspath(self.tree)).readlines()
            ])
            item_names = utils.get_names_order_from_newick_tree(
                newick_tree_text)
        else:
            item_names = utils.get_column_data_from_TAB_delim_file(
                self.view_data_path, column_indices=[0])[0][1:]

        # try to convert item names into integer values for proper sorting later. it's OK if it does
        # not work.
        try:
            item_names = [int(n) for n in item_names]
        except:
            pass

        view_data_path = os.path.abspath(
            self.view_data_path) if self.view_data_path else None
        self.p_meta['splits_fasta'] = os.path.abspath(
            self.fasta_file) if self.fasta_file else None
        self.p_meta['output_dir'] = None
        self.p_meta['views'] = {}
        self.p_meta['merged'] = True
        self.p_meta['default_view'] = 'single'
        self.default_view = self.p_meta['default_view']

        # set some default organizations of data:
        self.p_meta['clusterings'] = {
            'Alphabetical_(reverse):none:none': {
                'basic': sorted(item_names)
            },
            'Alphabetical:none:none': {
                'basic': sorted(item_names, reverse=True)
            }
        }
        self.p_meta['available_clusterings'] = [
            'Alphabetical_(reverse):none:none', 'Alphabetical:none:none'
        ]
        self.p_meta['default_clustering'] = self.p_meta[
            'available_clusterings'][0]

        # if we have a tree, let's make arrangements for it:
        if self.tree:
            clustering_id = '%s:unknown:unknown' % filesnpaths.get_name_from_file_path(
                self.tree)
            self.p_meta['default_clustering'] = clustering_id
            self.p_meta['available_clusterings'].append(clustering_id)
            self.p_meta['clusterings'][clustering_id] = {
                'newick': newick_tree_text
            }

        if self.view_data_path:
            # sanity of the view data
            filesnpaths.is_file_tab_delimited(view_data_path)
            view_data_columns = utils.get_columns_of_TAB_delim_file(
                view_data_path, include_first_column=True)

            # load view data as the default view:
            self.views[self.default_view] = {
                'header': view_data_columns[1:],
                'dict':
                utils.get_TAB_delimited_file_as_dictionary(view_data_path)
            }
        else:
            # no view data is provided... it is only the tree we have. we will creaet a mock 'view data dict'
            # here using what is in the tree.
            ad_hoc_dict = {}
            for item in item_names:
                ad_hoc_dict[item] = {'names': item}

            self.views[self.default_view] = {
                'header': ['names'],
                'dict': ad_hoc_dict
            }

        self.displayed_item_names_ordered = list(
            self.views[self.default_view]['dict'].keys())

        # we assume that the sample names are the header of the view data, so we might as well set it up:
        self.p_meta['samples'] = self.views[self.default_view]['header']

        # if we have an input FASTA file, we will set up the split_sequences and splits_basic_info dicts,
        # otherwise we will leave them empty
        self.splits_basic_info = {}
        self.split_sequences = None
        if self.p_meta['splits_fasta']:
            filesnpaths.is_file_fasta_formatted(self.p_meta['splits_fasta'])
            self.split_sequences = utils.get_FASTA_file_as_dictionary(
                self.p_meta['splits_fasta'])

            names_missing_in_FASTA = set(
                self.displayed_item_names_ordered) - set(
                    self.split_sequences.keys())
            num_names_missing_in_FASTA = len(names_missing_in_FASTA)
            if num_names_missing_in_FASTA:
                raise ConfigError(
                    'Some of the names in your view data does not have corresponding entries in the\
                                    FASTA file you provided. Here is an example to one of those %d names that occur\
                                    in your data file, but not in the FASTA file: "%s"'
                    %
                    (num_names_missing_in_FASTA, names_missing_in_FASTA.pop()))

            # setup a mock splits_basic_info dict
            for split_id in self.displayed_item_names_ordered:
                self.splits_basic_info[split_id] = {
                    'length':
                    len(self.split_sequences[split_id]),
                    'gc_content':
                    utils.get_GC_content_for_sequence(
                        self.split_sequences[split_id])
                }

        # create a new, empty profile database for manual operations
        if not os.path.exists(self.profile_db_path):
            profile_db = ProfileDatabase(self.profile_db_path)
            profile_db.create({
                'db_type': 'profile',
                'merged': True,
                'contigs_db_hash': None,
                'samples': ','.join(self.p_meta['samples'])
            })

        # create an instance of states table
        self.states_table = TablesForStates(self.profile_db_path)

        # also populate collections, if there are any
        self.collections.populate_collections_dict(self.profile_db_path)

        # read description from self table, if it is not available get_description function will return placeholder text
        self.p_meta['description'] = get_description_in_db(
            self.profile_db_path)

        if self.title:
            self.title = self.title