def sanity_check(self): if self.sample_names_in_samples_information_file and self.sample_names_in_samples_order_file: if sorted(self.sample_names_in_samples_information_file) != sorted( self.sample_names_in_samples_order_file): raise SamplesError('OK. Samples described in the information file and order file are not identical :/ ' 'Here are the %d sample names in the information file: "%s", versus the %d sample ' 'names in the orders file: "%s". And here is the difference: "%s".'\ % (len(self.sample_names_in_samples_information_file), self.sample_names_in_samples_information_file, len(self.sample_names_in_samples_order_file), self.sample_names_in_samples_order_file, list(set(self.sample_names_in_samples_information_file) - set(self.sample_names_in_samples_order_file)))) if not self.samples_information_default_layer_order: # we still don't have a default order. we will try to recover from that here # by looking into what we have in the samples order informaiton if not len(self.samples_order_dict): raise SamplesError( "Something is missing. Anvi'o is having hard time coming up with a default samples " "order for the samples database.") a_basic_order = [ o['basic'].split(',') if o['basic'] else None for o in list(self.samples_order_dict.values()) ][0] a_tree_order = utils.get_names_order_from_newick_tree([ o['newick'] if o['newick'] else None for o in list(self.samples_order_dict.values()) ][0]) self.samples_information_default_layer_order = a_basic_order or a_tree_order
def create_newick_file_from_matrix_file( observation_matrix_path, output_file_path, linkage=constants.linkage_method_default, distance=constants.distance_metric_default, norm='l1', progress=progress, transpose=False, items_order_file_path=None): is_distance_and_linkage_compatible(distance, linkage) filesnpaths.is_file_exists(observation_matrix_path) filesnpaths.is_file_tab_delimited(observation_matrix_path) filesnpaths.is_output_file_writable(output_file_path) if items_order_file_path: filesnpaths.is_output_file_writable(items_order_file_path) id_to_sample_dict, sample_to_id_dict, header, vectors = utils.get_vectors_from_TAB_delim_matrix( observation_matrix_path, transpose=transpose) vectors = np.array(vectors) newick = get_newick_from_matrix(vectors, distance, linkage, norm, id_to_sample_dict) if output_file_path: open(output_file_path, 'w').write(newick.strip() + '\n') if items_order_file_path: open(items_order_file_path, 'w').write( '\n'.join(utils.get_names_order_from_newick_tree(newick)) + '\n')
def get_index_total_previous_and_next_items(self, order_name, item_name): previous_item_name = None next_item_name = None index = None total = None # FIXME: improve performance here items_order_entry = self.interactive.p_meta['item_orders'][order_name] items_order = None if items_order_entry['type'] == 'newick': items_order = utils.get_names_order_from_newick_tree( items_order_entry['data']) else: items_order = items_order_entry['data'] index_of_item = items_order.index(item_name) if index_of_item: previous_item_name = items_order[index_of_item - 1] if (index_of_item + 1) < len(items_order): next_item_name = items_order[index_of_item + 1] index = index_of_item + 1 total = len(items_order) return index, total, previous_item_name, next_item_name
def get_layer_names(self, data_dict): layer_names = {} for data_key in data_dict: try: if data_dict[data_key]['data_type'] == 'newick': layer_names[data_key] = utils.get_names_order_from_newick_tree(data_dict[data_key]['data_value']) else: layer_names[data_key] = [s.strip() for s in data_dict[data_key]['data_value'].split(',')] except: raise ConfigError("Parsing the %s data for %s failed :/ We don't know why, because we are lazy. Please\ take a loook at your input data and figure out :(" \ % (data_dict[data_key]['data_type'], data_key)) return layer_names
def create_newick_file_from_matrix_file(observation_matrix_path, output_file_path, linkage=constants.linkage_method_default, distance=constants.distance_metric_default, norm='l1', progress=progress, transpose=False, items_order_file_path=None): is_distance_and_linkage_compatible(distance, linkage) filesnpaths.is_file_exists(observation_matrix_path) filesnpaths.is_file_tab_delimited(observation_matrix_path) filesnpaths.is_output_file_writable(output_file_path) if items_order_file_path: filesnpaths.is_output_file_writable(items_order_file_path) id_to_sample_dict, sample_to_id_dict, header, vectors = utils.get_vectors_from_TAB_delim_matrix(observation_matrix_path, transpose=transpose) vectors = np.array(vectors) newick = get_newick_from_matrix(vectors, distance, linkage, norm, id_to_sample_dict) if output_file_path: open(output_file_path, 'w').write(newick.strip() + '\n') if items_order_file_path: open(items_order_file_path, 'w').write('\n'.join(utils.get_names_order_from_newick_tree(newick)) + '\n')
def sanity_check(self): if self.sample_names_in_samples_information_file and self.sample_names_in_samples_order_file: if sorted(self.sample_names_in_samples_information_file) != sorted(self.sample_names_in_samples_order_file): raise SamplesError, 'OK. Samples described in the information file and order file are not identical :/\ Here are the %d sample names in the information file: "%s", versus the %d sample\ names in the orders file: "%s". And here is the difference: "%s".'\ % (len(self.sample_names_in_samples_information_file), self.sample_names_in_samples_information_file, len(self.sample_names_in_samples_order_file), self.sample_names_in_samples_order_file, list(set(self.sample_names_in_samples_information_file) - set(self.sample_names_in_samples_order_file))) if not self.samples_information_default_layer_order: # we still don't have a default order. we will try to recover from that here # by looking into what we have in the samples order informaiton if not len(self.samples_order_dict): raise SamplesError, "Something is missing. Anvi'o is having hard time coming up with a default samples\ order for the samples database." a_basic_order = [o['basic'] for o in self.samples_order_dict.values()][0] a_tree_order = utils.get_names_order_from_newick_tree([o['newick'] for o in self.samples_order_dict.values()][0]) self.samples_information_default_layer_order = a_basic_order or a_tree_order
def __init__(self, args, external_clustering=None): self.args = args self.views = {} self.states_table = None self.p_meta = {} self.title = 'Unknown Project' A = lambda x: args.__dict__[x] if x in args.__dict__ else None self.mode = A('mode') self.profile_db_path = A('profile_db') self.contigs_db_path = A('contigs_db') self.collection_name = A('collection_name') self.manual_mode = A('manual_mode') self.split_hmm_layers = A('split_hmm_layers') self.taxonomic_level = A('taxonomic_level') self.additional_layers_path = A('additional_layers') self.additional_view_path = A('additional_view') self.samples_information_db_path = A('samples_information_db') self.view = A('view') self.fasta_file = A('fasta_file') self.view_data_path = A('view_data') self.tree = A('tree') self.title = A('title') self.output_dir = A('output_dir') self.show_views = A('show_views') self.state_autoload = A('state_autoload') self.collection_autoload = A('collection_autoload') self.show_states = A('show_states') self.skip_check_names = A('skip_check_names') self.list_collections = A('list_collections') self.distance = A('distance') or constants.distance_metric_default self.linkage = A('linkage') or constants.linkage_method_default # make sure early on that both the distance and linkage is OK. clustering.is_distance_and_linkage_compatible(self.distance, self.linkage) self.split_names_ordered = None self.additional_layers = None self.auxiliary_profile_data_available = False self.samples_information_dict = {} self.samples_order_dict = {} self.samples_information_default_layer_order = {} # make sure the mode will be set properly if self.collection_name and self.manual_mode: raise ConfigError, "You can't anvi-interactive in manual mode with a collection name." self.external_clustering = external_clustering self.collections = ccollections.Collections() ContigsSuperclass.__init__(self, self.args) self.init_splits_taxonomy(self.taxonomic_level) if self.samples_information_db_path: samples_information_db = SamplesInformationDatabase( self.samples_information_db_path) self.samples_information_dict, self.samples_order_dict = samples_information_db.get_samples_information_and_order_dicts( ) self.samples_information_default_layer_order = samples_information_db.get_samples_information_default_layer_order( ) samples_information_db.disconnect() if self.contigs_db_path: self.completeness = Completeness(self.contigs_db_path) self.collections.populate_collections_dict( self.contigs_db_path, anvio.__contigs__version__) else: self.completeness = None if 'skip_init_functions' in args and not args.skip_init_functions: self.init_functions() # make sure we are not dealing with apples and oranges here. if self.contigs_db_path and self.profile_db_path: is_profile_db_and_contigs_db_compatible(self.profile_db_path, self.contigs_db_path) self.P = lambda x: os.path.join(self.p_meta['output_dir'], x) self.cwd = os.getcwd() # here is where the big deal stuff takes place: if not self.mode and self.manual_mode: self.mode = 'manual' self.run.info('Mode', self.mode, mc='red') self.load_manual_mode(args) elif self.mode == 'refine': self.load_full_mode(args) elif self.collection_name or self.list_collections: self.mode = 'collection' self.run.info('Mode', self.mode, mc='green') self.load_collection_mode(args) else: self.mode = 'full' self.load_full_mode(args) # make sure the samples information database, if there is one, is in fact compatible with the profile database # the reason we are doing this here is because when we are in 'self.manual_mode', the self.p_meta['samples'] is # being filled within the self.load_manual_mode function based on the headers of the view data. if self.profile_db_path and self.samples_information_db_path: is_profile_db_and_samples_db_compatible( self.profile_db_path, self.samples_information_db_path, manual_mode_exception=self.manual_mode) if self.external_clustering: self.p_meta[ 'clusterings'] = self.clusterings = self.external_clustering[ 'clusterings'] self.p_meta['available_clusterings'] = self.clusterings.keys() self.p_meta['default_clustering'] = self.external_clustering[ 'default_clustering'] if not self.state_autoload and 'default' in self.states_table.states: self.state_autoload = 'default' if not self.collection_autoload and 'default' in self.collections.collections_dict: self.collection_autoload = 'default' if not self.p_meta['clusterings']: if self.p_meta['merged']: raise ConfigError, "This merged profile database does not seem to have any hierarchical clustering\ of splits that is required by the interactive interface. It may have been generated\ by anvi-merge with the `--skip-hierarchical-clustering` flag, or hierarchical\ clustering step may have been skipped by anvi-merge because you had too many stplits\ to get the clustering in a reasonable amount of time. Please read the help menu for\ anvi-merge, and/or refer to the tutorial: \ http://merenlab.org/2015/05/01/anvio-tutorial/#clustering-during-merging" else: raise ConfigError, "This single profile database does not seem to have any hierarchical clustering\ that is required by the interactive interface. You must use `--cluster-contigs`\ flag for single profiles to access to this functionality. Please read the help\ menu for anvi-profile, and/or refer to the tutorial." # self.split_names_ordered is going to be the 'master' names list. everything else is going to # need to match these names: self.split_names_ordered = utils.get_names_order_from_newick_tree( self.p_meta['clusterings'][ self.p_meta['default_clustering']]['newick']) # now we knot what splits we are interested in (self.split_names_ordered), we can get rid of all the # unnecessary splits stored in views dicts. self.prune_view_dicts() # if there are any HMM search results in the contigs database other than 'singlecopy' sources, # we would like to visualize them as additional layers. following function is inherited from # Contigs DB superclass and will fill self.hmm_searches_dict if appropriate data is found in # search tables: if self.mode == 'full': self.init_non_singlecopy_gene_hmm_sources( self.split_names_ordered, return_each_gene_as_a_layer=self.split_hmm_layers) if self.additional_layers_path: filesnpaths.is_file_tab_delimited(self.additional_layers_path) self.additional_layers = self.additional_layers_path self.check_names_consistency() self.convert_view_data_into_json()
def __init__(self, args, external_clustering=None): self.args = args self.views = {} self.states_table = None self.p_meta = {} self.title = 'Unknown Project' A = lambda x: args.__dict__[x] if x in args.__dict__ else None self.mode = A('mode') self.profile_db_path = A('profile_db') self.contigs_db_path = A('contigs_db') self.collection_name = A('collection_name') self.manual_mode = A('manual_mode') self.split_hmm_layers = A('split_hmm_layers') self.additional_layers_path = A('additional_layers') self.additional_view_path = A('additional_view') self.samples_information_db_path = A('samples_information_db') self.view = A('view') self.fasta_file = A('fasta_file') self.view_data_path = A('view_data') self.tree = A('tree') self.title = A('title') self.output_dir = A('output_dir') self.show_views = A('show_views') self.state = A('state') self.show_states = A('show_states') self.skip_check_names = A('skip_check_names') self.list_collections = A('list_collections') self.distance = A('distance') or constants.distance_metric_default self.linkage = A('linkage') or constants.linkage_method_default # make sure early on that both the distance and linkage is OK. clustering.is_distance_and_linkage_compatible(self.distance, self.linkage) self.split_names_ordered = None self.additional_layers = None self.auxiliary_profile_data_available = False self.samples_information_dict = {} self.samples_order_dict = {} self.samples_information_default_layer_order = {} # make sure the mode will be set properly if self.collection_name and self.manual_mode: raise ConfigError, "You can't anvi-interactive in manual mode with a collection name." self.external_clustering = external_clustering self.collections = ccollections.Collections() ContigsSuperclass.__init__(self, self.args) self.init_splits_taxonomy() if self.samples_information_db_path: samples_information_db = SamplesInformationDatabase(self.samples_information_db_path) self.samples_information_dict, self.samples_order_dict = samples_information_db.get_samples_information_and_order_dicts() self.samples_information_default_layer_order = samples_information_db.get_samples_information_default_layer_order() samples_information_db.disconnect() if self.contigs_db_path: self.completeness = Completeness(self.contigs_db_path) self.collections.populate_collections_dict(self.contigs_db_path, anvio.__contigs__version__) else: self.completeness = None if 'skip_init_functions' in args and not args.skip_init_functions: self.init_functions() # make sure we are not dealing with apples and oranges here. if self.contigs_db_path and self.profile_db_path: is_profile_db_and_contigs_db_compatible(self.profile_db_path, self.contigs_db_path) self.P = lambda x: os.path.join(self.p_meta['output_dir'], x) self.cwd = os.getcwd() # here is where the big deal stuff takes place: if not self.mode and self.manual_mode: self.mode = 'manual' self.run.info('Mode', self.mode, mc='red') self.load_manual_mode(args) elif self.mode == 'refine': self.load_full_mode(args) elif self.collection_name or self.list_collections: self.mode = 'collection' self.run.info('Mode', self.mode, mc='green') self.load_collection_mode(args) else: self.mode = 'full' self.load_full_mode(args) # make sure the samples information database, if there is one, is in fact compatible with the profile database # the reason we are doing this here is because when we are in 'self.manual_mode', the self.p_meta['samples'] is # being filled within the self.load_manual_mode function based on the headers of the view data. if self.profile_db_path and self.samples_information_db_path: is_profile_db_and_samples_db_compatible(self.profile_db_path, self.samples_information_db_path, manual_mode_exception=self.manual_mode) if self.external_clustering: self.p_meta['clusterings'] = self.clusterings = self.external_clustering['clusterings'] self.p_meta['available_clusterings'] = self.clusterings.keys() self.p_meta['default_clustering'] = self.external_clustering['default_clustering'] if not self.state and 'default' in self.states_table.states: self.state = 'default' if not self.p_meta['clusterings']: if self.p_meta['merged']: raise ConfigError, "This merged profile database does not seem to have any hierarchical clustering\ of splits that is required by the interactive interface. It may have been generated\ by anvi-merge with the `--skip-hierarchical-clustering` flag, or hierarchical\ clustering step may have been skipped by anvi-merge because you had too many stplits\ to get the clustering in a reasonable amount of time. Please read the help menu for\ anvi-merge, and/or refer to the tutorial: \ http://merenlab.org/2015/05/01/anvio-tutorial/#clustering-during-merging" else: raise ConfigError, "This single profile database does not seem to have any hierarchical clustering\ that is required by the interactive interface. You must use `--cluster-contigs`\ flag for single profiles to access to this functionality. Please read the help\ menu for anvi-profile, and/or refer to the tutorial." # self.split_names_ordered is going to be the 'master' names list. everything else is going to # need to match these names: self.split_names_ordered = utils.get_names_order_from_newick_tree(self.p_meta['clusterings'][self.p_meta['default_clustering']]['newick']) # now we knot what splits we are interested in (self.split_names_ordered), we can get rid of all the # unnecessary splits stored in views dicts. self.prune_view_dicts() # if there are any HMM search results in the contigs database other than 'singlecopy' sources, # we would like to visualize them as additional layers. following function is inherited from # Contigs DB superclass and will fill self.hmm_searches_dict if appropriate data is found in # search tables: if self.mode == 'full': self.init_non_singlecopy_gene_hmm_sources(self.split_names_ordered, return_each_gene_as_a_layer=self.split_hmm_layers) if self.additional_layers_path: filesnpaths.is_file_tab_delimited(self.additional_layers_path) self.additional_layers = self.additional_layers_path self.check_names_consistency() self.convert_view_data_into_json()
def handle(self, *args, **options): conn = sqlite3.connect(options['userdb_path'][0]) """ CREATE TABLE users ( 0 login TEXT PRIMARY KEY, 1 firstname TEXT, 2 lastname TEXT, 3 email TEXT, 4 password TEXT, 5 path TEXT, 6 token TEXT, 7 accepted INTEGER, 8 project TEXT, 9 affiliation TEXT, 10 ip TEXT, 11 clearance TEXT, 12 date TEXT, 13 visit TEXT) """ print("Migrating users table...") user_paths = {} for row in conn.execute('SELECT * FROM users;'): username = sanitize_username(row[0]) password = "******" + row[4] email = row[3] is_active = True is_superuser = True if row[11] == 'admin' else False user_paths[row[5]] = username date_joined = datetime.strptime( row[12], "%Y-%m-%d").replace(tzinfo=timezone.utc) newuser = User(username=username, password=password, email=email, is_active=is_active, is_superuser=is_superuser, is_staff=is_superuser, date_joined=date_joined) newuser.save() fullname = "%s %s" % (row[1], row[2]) institution = row[9] if len(fullname) < 2: fullname = None newuser_profile = UserProfile(user=newuser, fullname=fullname, orcid=None, institution=institution) newuser_profile.save() print(" - Successful.") print("Moving project files... ") for path in user_paths: username = user_paths[path] # old user project dir src = os.path.join(options['userfiles_path'][0], path) #new user project dir dst = os.path.join(settings.USER_DATA_DIR, username) try: shutil.copytree(src, dst) except FileNotFoundError: # if user path does not exists create empty dir for user os.makedirs(dst) print(" - Successful") print("Migratins project table... ") """ CREATE TABLE projects ( 0 name TEXT PRIMARY KEY, 1 path TEXT, 2 user TEXT, 3 description TEXT) """ for row in conn.execute('SELECT * FROM projects;'): name = row[0] slug = slugify(name).replace('-', '_') path = row[1] username = sanitize_username(row[2]) description = row[3] #rename project files fileTypes_old = [ 'treeFile', 'dataFile', 'fastaFile', 'samplesOrderFile', 'samplesInformationFile' ] fileTypes_new = [ 'tree.txt', 'data.txt', 'fasta.fa', 'samples-order.txt', 'samples-info.txt' ] for i in range(5): try: os.rename( os.path.join(settings.USER_DATA_DIR, username, path, fileTypes_old[i]), os.path.join(settings.USER_DATA_DIR, username, path, fileTypes_new[i])) except: pass try: if not description or not len(description) > 0: description = "" project = Project(name=name, slug=slug, user=User.objects.get(username=username), secret=path) samples_info = project.get_file_path('samples-order.txt', default=None) samples_order = project.get_file_path('samples-info.txt', default=None) if (samples_info or samples_order) and not project.get_file_path( 'samples.db', default=None): s = dbops.SamplesInformationDatabase(project.get_file_path( 'samples.db', dont_check_exists=True), quiet=True) s.create(samples_order, samples_info) interactive = project.get_interactive() # try to get number of leaves try: leaves = get_names_order_from_newick_tree( project.get_file_path('tree.txt', default=None)) project.num_leaves = len(leaves) if leaves != [''] else 0 except: project.num_leaves = 0 # try to get number of layers try: project.num_layers = len( interactive.views['single'] [0]) - 1 # <- -1 because first column is contigs except: project.num_layers = 0 # store description dbops.update_description_in_db( project.get_file_path('profile.db', default=None), description or '') project.synchronize_num_states() project.synchronize_num_collections() project.save() # try to migrate old links. for row_links in conn.execute( 'SELECT * FROM views WHERE project LIKE \'%s\';' % (name)): old_link = OldLinks( name=row_links[0], user=sanitize_username(row_links[1]), project=Project.objects.filter( name=row_links[2], user__username=sanitize_username(row_links[1]))[0], is_public=True if row_links[3] == 1 else False, token=row_links[4]) old_link.save() project_link = ProjectLink(project=old_link.project, link=old_link.token) project_link.save() except Exception as e: print(username + " " + name + " " + path + " failed to create project, here is the exception " + str(e)) shutil.rmtree( os.path.join(settings.USER_DATA_DIR, username, path)) print(" - Successful")
def load_manual_mode(self, args): if self.contigs_db_path: raise ConfigError( "When you want to use the interactive interface in manual mode, you must\ not use a contigs database.") if not self.profile_db_path: raise ConfigError( "Even when you want to use the interactive interface in manual mode, you need\ to provide a profile database path. But you DO NOT need an already existing\ profile database, since anvi'o will generate an empty one for you. The profile\ database in this mode only used to read or store the 'state' of the display\ for visualization purposes, or to allow you to create and store collections." ) # if the user is using an existing profile database, we need to make sure that it is not associated # with a contigs database, since it would mean that it is a full anvi'o profile database and should # not be included in manual operations. if filesnpaths.is_file_exists(self.profile_db_path, dont_raise=True): profile_db = ProfileDatabase(self.profile_db_path) if profile_db.meta['contigs_db_hash']: raise ConfigError( "Well. It seems the profile database is associated with a contigs database,\ which means using it in manual mode is not the best way to use it. Probably\ what you wanted to do is to let the manual mode create a new profile database\ for you. Simply type in a new profile database path (it can be a file name\ that doesn't exist).") if not self.tree and not self.view_data_path: raise ConfigError( "You must be joking Mr. Feynman. No tree file, and no data file? What is it that\ anvi'o supposed to visualize? :(") if not self.tree: self.run.warning( "You haven't declared a tree file. Anvi'o will do its best to come up with an\ organization of your items.") if self.view: raise ConfigError( "You can't use '--view' parameter when you are running the interactive interface\ in manual mode") if self.show_views: raise ConfigError( "Sorry, there are no views to show in manual mode :/") if self.show_states: raise ConfigError( "Sorry, there are no states to show in manual mode :/") if self.tree: filesnpaths.is_file_exists(self.tree) newick_tree_text = ''.join([ l.strip() for l in open(os.path.abspath(self.tree)).readlines() ]) item_names = utils.get_names_order_from_newick_tree( newick_tree_text) else: item_names = utils.get_column_data_from_TAB_delim_file( self.view_data_path, column_indices=[0])[0][1:] # try to convert item names into integer values for proper sorting later. it's OK if it does # not work. try: item_names = [int(n) for n in item_names] except: pass view_data_path = os.path.abspath( self.view_data_path) if self.view_data_path else None self.p_meta['splits_fasta'] = os.path.abspath( self.fasta_file) if self.fasta_file else None self.p_meta['output_dir'] = None self.p_meta['views'] = {} self.p_meta['merged'] = True self.p_meta['default_view'] = 'single' self.default_view = self.p_meta['default_view'] # set some default organizations of data: self.p_meta['clusterings'] = { 'Alphabetical_(reverse):none:none': { 'basic': sorted(item_names) }, 'Alphabetical:none:none': { 'basic': sorted(item_names, reverse=True) } } self.p_meta['available_clusterings'] = [ 'Alphabetical_(reverse):none:none', 'Alphabetical:none:none' ] self.p_meta['default_clustering'] = self.p_meta[ 'available_clusterings'][0] # if we have a tree, let's make arrangements for it: if self.tree: clustering_id = '%s:unknown:unknown' % filesnpaths.get_name_from_file_path( self.tree) self.p_meta['default_clustering'] = clustering_id self.p_meta['available_clusterings'].append(clustering_id) self.p_meta['clusterings'][clustering_id] = { 'newick': newick_tree_text } if self.view_data_path: # sanity of the view data filesnpaths.is_file_tab_delimited(view_data_path) view_data_columns = utils.get_columns_of_TAB_delim_file( view_data_path, include_first_column=True) # load view data as the default view: self.views[self.default_view] = { 'header': view_data_columns[1:], 'dict': utils.get_TAB_delimited_file_as_dictionary(view_data_path) } else: # no view data is provided... it is only the tree we have. we will creaet a mock 'view data dict' # here using what is in the tree. ad_hoc_dict = {} for item in item_names: ad_hoc_dict[item] = {'names': item} self.views[self.default_view] = { 'header': ['names'], 'dict': ad_hoc_dict } self.displayed_item_names_ordered = list( self.views[self.default_view]['dict'].keys()) # we assume that the sample names are the header of the view data, so we might as well set it up: self.p_meta['samples'] = self.views[self.default_view]['header'] # if we have an input FASTA file, we will set up the split_sequences and splits_basic_info dicts, # otherwise we will leave them empty self.splits_basic_info = {} self.split_sequences = None if self.p_meta['splits_fasta']: filesnpaths.is_file_fasta_formatted(self.p_meta['splits_fasta']) self.split_sequences = utils.get_FASTA_file_as_dictionary( self.p_meta['splits_fasta']) names_missing_in_FASTA = set( self.displayed_item_names_ordered) - set( self.split_sequences.keys()) num_names_missing_in_FASTA = len(names_missing_in_FASTA) if num_names_missing_in_FASTA: raise ConfigError( 'Some of the names in your view data does not have corresponding entries in the\ FASTA file you provided. Here is an example to one of those %d names that occur\ in your data file, but not in the FASTA file: "%s"' % (num_names_missing_in_FASTA, names_missing_in_FASTA.pop())) # setup a mock splits_basic_info dict for split_id in self.displayed_item_names_ordered: self.splits_basic_info[split_id] = { 'length': len(self.split_sequences[split_id]), 'gc_content': utils.get_GC_content_for_sequence( self.split_sequences[split_id]) } # create a new, empty profile database for manual operations if not os.path.exists(self.profile_db_path): profile_db = ProfileDatabase(self.profile_db_path) profile_db.create({ 'db_type': 'profile', 'merged': True, 'contigs_db_hash': None, 'samples': ','.join(self.p_meta['samples']) }) # create an instance of states table self.states_table = TablesForStates(self.profile_db_path) # also populate collections, if there are any self.collections.populate_collections_dict(self.profile_db_path) # read description from self table, if it is not available get_description function will return placeholder text self.p_meta['description'] = get_description_in_db( self.profile_db_path) if self.title: self.title = self.title