def install_db(database, db_filepath=None, schema_version=None): """Install database. If database already exists, it is first removed.""" engine1, msg = mysqldb.get_engine(database="", echo=False) if engine1 is None: print("Invalid MySQL credentials.") else: result2 = mysqldb.drop_create_db(engine1, database) if result2 != 0: print("Unable to create new, empty database.") else: engine2, msg = mysqldb.get_engine(database=database, username=engine1.url.username, password=engine1.url.password, echo=False) if engine2 is None: print(f"No connection to the {database} database due " "to invalid credentials or database.") else: if db_filepath is not None: mysqldb.install_db(engine2, db_filepath) else: mysqldb.execute_transaction(engine2, db_schema_0.STATEMENTS) convert_args = ["pdm_utils.run", "convert", database, "-s", str(schema_version)] convert_db.main(convert_args, engine2) # Close up all connections in the connection pool. engine2.dispose() # Close up all connections in the connection pool. engine1.dispose()
def reinsert_pham_data(new_phams, new_colors, engine): """ Puts pham data back into the database :param new_phams: :param new_colors: :param engine: :return: """ # Colors have to go first, since PhamID column in gene table references # PhamID in pham table commands = [] for key in new_colors.keys(): commands.append(f"INSERT INTO pham (PhamID, Color) VALUES ({key}, " f"'{new_colors[key]}')") mysqldb.execute_transaction(engine, commands) commands = [] for key in new_phams.keys(): for gene in new_phams[key]: commands.append(f"UPDATE gene SET PhamID = {key} WHERE GeneID = '{gene}'") mysqldb.execute_transaction(engine, commands)
def install_db(database, db_filepath=None, schema_version=None): """Install database. If database already exists, it is first removed.""" # No need to specify database yet, since it needs to first check if the # database exists. alchemist1 = AlchemyHandler(database="") alchemist1.connect(pipeline=True) engine1 = alchemist1.engine result = mysqldb_basic.drop_create_db(engine1, database) if result != 0: print("Unable to create new, empty database.") else: alchemist2 = AlchemyHandler(database=database, username=engine1.url.username, password=engine1.url.password) alchemist2.connect(pipeline=True) engine2 = alchemist2.engine if engine2 is None: print(f"No connection to the {database} database due " "to invalid credentials or database.") else: if db_filepath is not None: mysqldb_basic.install_db(engine2, db_filepath) else: mysqldb.execute_transaction(engine2, db_schema_0.STATEMENTS) convert_args = [ "pdm_utils.run", "convert", database, "-s", str(schema_version) ] convert_db.main(convert_args, engine2) # Close up all connections in the connection pool. engine2.dispose() # Close up all connections in the connection pool. engine1.dispose()
def fix_colored_orphams(engine): """ Find any single-member phams which are colored as though they are multi-member phams (not #FFFFFF in pham.Color). :param engine: sqlalchemy Engine allowing access to the database :return: """ query = "SELECT * FROM (SELECT g.PhamID, COUNT(GeneID) AS count, " \ "p.Color FROM gene AS g INNER JOIN pham AS p ON g.PhamID " \ "=p.PhamID GROUP BY PhamID) AS c WHERE Color != '#FFFFFF' " \ "AND count = 1" results = mysqldb_basic.query_dict_list(engine, query) print(f"Found {len(results)} non-white orphams...") commands = [] for dictionary in results: pham_id = dictionary["PhamID"] count = dictionary["count"] color = dictionary["Color"] new_color = "#FFFFFF" commands.append(f"UPDATE pham SET Color = '{new_color}' WHERE " f"PhamID = '{pham_id}'") mysqldb.execute_transaction(engine, commands)
def fix_white_phams(engine): """ Find any phams with 2+ members which are colored as though they are orphams (#FFFFFF in pham.Color). :param engine: sqlalchemy Engine allowing access to the database :return: """ query = "SELECT c.PhamID FROM (SELECT g.PhamID, COUNT(GeneID) AS count, "\ "p.Color FROM gene AS g INNER JOIN pham AS p ON g.PhamID " \ "= p.PhamID GROUP BY PhamID) AS c WHERE Color = '#FFFFFF' "\ "AND count > 1" results = mysqldb_basic.query_dict_list(engine, query) print(f"Found {len(results)} white phams...") commands = [] for dictionary in results: pham_id = dictionary["PhamID"] h = s = v = 0 while h <= 0: h = random.random() while s < 0.5: s = random.random() while v < 0.8: v = random.random() rgb = colorsys.hsv_to_rgb(h, s, v) rgb = (rgb[0] * 255, rgb[1] * 255, rgb[2] * 255) hexrgb = "#{:02x}{:02x}{:02x}".format(int(rgb[0]), int(rgb[1]), int(rgb[2])) new_color = hexrgb.upper() commands.append(f"UPDATE pham SET Color = '{new_color}' WHERE " f"PhamID = '{pham_id}'") mysqldb.execute_transaction(engine, commands)
def fix_miscolored_phams(engine): print("Phixing Phalsely Hued Phams...") # Phams which are colored as though they are orphams, when really # they are multi-member phams query = "SELECT * FROM (SELECT g.PhamID, COUNT(GeneID) AS count, "\ "p.Color FROM gene AS g INNER JOIN pham AS p ON g.PhamID " \ "= p.PhamID GROUP BY PhamID) AS c WHERE Color = '#FFFFFF' "\ "AND count > 1" results = mysqldb.query_dict_list(engine, query) print(f"Found {len(results)} miscolored phams to fix") commands = [] for dictionary in results: pham_id = dictionary["PhamID"] count = dictionary["count"] color = dictionary["Color"] h = s = v = 0 while h <= 0: h = random.random() while s < 0.5: s = random.random() while v < 0.8: v = random.random() rgb = colorsys.hsv_to_rgb(h, s, v) rgb = (rgb[0] * 255, rgb[1] * 255, rgb[2] * 255) hexrgb = "#{:02x}{:02x}{:02x}".format(int(rgb[0]), int(rgb[1]), int(rgb[2])) new_color = hexrgb commands.append( f"UPDATE pham SET Color = '{new_color}' WHERE PhamID = '{pham_id}'" ) mysqldb.execute_transaction(engine, commands) print("Phixing Phalsely Phlagged Orphams...") # Phams which are colored as though they are multi-member phams # when really they are orphams query = "SELECT * FROM (SELECT g.PhamID, COUNT(GeneID) AS count, "\ "p.Color FROM gene AS g INNER JOIN pham AS p ON g.PhamID "\ "=p.PhamID GROUP BY PhamID) AS c WHERE Color != '#FFFFFF' "\ "AND count = 1" results = mysqldb.query_dict_list(engine, query) print(f"Found {len(results)} miscolored orphams to fix...") commands = [] for dictionary in results: pham_id = dictionary["PhamID"] count = dictionary["count"] color = dictionary["Color"] new_color = "#FFFFFF" commands.append( f"UPDATE pham SET Color = '{new_color}' WHERE PhamID = '{pham_id}'" ) mysqldb.execute_transaction(engine, commands)
def test_execute_transaction_2(self): """Valid connection but invalid transaction should return code 1.""" valid1 = ("INSERT INTO phage " "(PhageID, Accession, Name, HostGenus, Sequence, " "Length, GC, Status, DateLastModified, " "RetrieveRecord, AnnotationAuthor," "Cluster, Subcluster) " "VALUES ('D29', 'ABC123', 'L5_Draft', 'Mycobacterium', " "'ATCG', 4, 0.5001, 'final', " f"'{constants.EMPTY_DATE}', 1, 1, 'A', 'A2');") invalid1 = ("INSERT INTO phage " "(PhageID, Accession, Name, HostGenus, Sequence, " "Length, GC, Status, DateLastModified, " "RetrieveRecord, AnnotationAuthor," "Cluster, Subcluster) " "VALUES ('Trixie', 'ABC123', 'L5_Draft', 'Mycobacterium', " "'ATCG', 4, 0.5001, 'final', " f"'{constants.EMPTY_DATE}', 1, 1, 'A', 'A2');") invalid_stmts = [valid1, invalid1] return_code, msg = mysqldb.execute_transaction(self.engine, invalid_stmts) query = "SELECT COUNT(PhageID) FROM phage" result_list = self.engine.execute(query).fetchall() count = result_list[0][0] with self.subTest(): self.assertEqual(count, 1) with self.subTest(): self.assertEqual(return_code, 1)
def convert_schema(engine, actual, dir, steps, verbose=False): """Iterate through conversion steps and convert database schema.""" summary = {} #Key = conversion step. Value = summary dictionary. index = 0 convert = True stop_step = actual while (index < len(steps) and convert == True): step = steps[index] if verbose == True: print(f"{dir[:-1].capitalize()}ing to schema version {step}...") step_name = get_step_name(dir, step) step_dict = get_step_data(step_name) commands = step_dict["statements"] commands = [i for i in commands if i != ""] commands = [i for i in commands if i != "\n"] # Try to convert the schema. result = mysqldb.execute_transaction(engine, commands) if result == 1: convert = False print("Error encountered while executing MySQL statements.") if convert == False: print(f"Error: Unable to {dir} schema to version {step}.") else: stop_step = step summary[step_name] = step_dict["step_summary_dict"] index += 1 return stop_step, summary
def update_gene_table(phams, engine): """ Updates the gene table with new pham data :param phams: new pham gene data :type phams: dict :param engine: sqlalchemy Engine allowing access to the database :return: """ commands = [] # We need to issue an update command for each gene in each pham for key in phams.keys(): for gene in phams[key]: commands.append( f"UPDATE gene SET PhamID = {key} WHERE GeneID = '{gene}'") mysqldb.execute_transaction(engine, commands)
def update_pham_table(colors, engine): """ Populates the pham table with the new PhamIDs and their colors. :param colors: new pham color data :type colors: dict :param engine: sqlalchemy Engine allowing access to the database :return: """ # First command needs to clear the pham table commands = ["DELETE FROM pham"] # Then we need to issue insert commands for each pham for key in colors.keys(): commands.append(f"INSERT INTO pham (PhamID, Color) VALUES ({key}, " f"'{colors[key]}')") mysqldb.execute_transaction(engine, commands)
def test_execute_transaction_3(self): """Everything ok but no transaction should return 0.""" return_code, msg = mysqldb.execute_transaction(self.engine) self.assertEqual(return_code, 0)
def main(argument_list): # Set up the argument parser phamerate_parser = setup_argparser() # Parse arguments args = phamerate_parser.parse_args(argument_list) program = args.program temp_dir = args.temp_dir # Initialize SQLAlchemy engine with database provided at CLI engine = mysqldb.connect_to_db(args.db) # If we made it past the above connection_status() check, database access # works (user at least has SELECT privileges on the indicated database). # We'll assume that they also have UPDATE, INSERT, and TRUNCATE privileges. # Record start time start = datetime.datetime.now() # Refresh temp_dir if os.path.exists(temp_dir): try: shutil.rmtree(temp_dir) except OSError: print(f"Failed to delete existing temp directory '{temp_dir}'") return try: os.makedirs(temp_dir) except OSError: print(f"Failed to create new temp directory '{temp_dir}") return # Get old pham data and un-phamerated genes old_phams = get_pham_geneids(engine) old_colors = get_pham_colors(engine) unphamerated = get_new_geneids(engine) # Get GeneIDs & translations, and translation groups genes_and_trans = map_geneids_to_translations(engine) translation_groups = map_translations_to_geneids(engine) # Write input fasta file write_fasta(translation_groups, temp_dir) # Create clusterdb and perform clustering program_params = get_program_params(program, args) create_clusterdb(program, temp_dir) phamerate(program_params, program, temp_dir) # Parse phameration output new_phams = parse_output(program, temp_dir) new_phams = reintroduce_duplicates(new_phams, translation_groups, genes_and_trans) # Preserve old pham names and colors new_phams, new_colors = preserve_phams(old_phams, new_phams, old_colors, unphamerated) # Early exit if we don't have new phams or new colors - avoids # overwriting the existing pham data with potentially incomplete new data if len(new_phams) == 0 or len(new_colors) == 0: print("Failed to parse new pham/color data... Terminating pipeline") return # If we got past the early exit, we are probably safe to truncate the # pham table, and insert the new pham data # Clear old pham data - auto commits at end of transaction - this will also # set all PhamID values in gene table to NULL commands = ["DELETE FROM pham"] mysqldb.execute_transaction(engine, commands) # Insert new pham/color data reinsert_pham_data(new_phams, new_colors, engine) # Fix miscolored phams/orphams fix_miscolored_phams(engine) # Close all connections in the connection pool. engine.dispose() # Record stop time stop = datetime.datetime.now() # Report phameration elapsed time print("Elapsed time: {}".format(str(stop - start)))
def install_db(alchemist, database, db_filepath=None, config_file=None, schema_version=None, verbose=False, pipeline=False): """ Install database. If database already exists, it is first removed. :param database: Name of the database to be installed :type database: str :param db_filepath: Directory for installation :type db_filepath: Path """ # No need to specify database yet, since it needs to first check if the # database exists. engine = alchemist.engine result = mysqldb_basic.drop_create_db(engine, database) engine.dispose() if result != 0: if pipeline: print("Unable to create new, empty database.\nPlease " "check SQL service status and/or database availability.") sys.exit(1) raise OSError("Unable to create new, empty database.\nPlease " "check SQL service status and/or database availability.") alchemist.database = database try: alchemist.validate_database() except MySQLDatabaseError: if pipeline: print(f"No connection to database {database} due " "to invalid credentials and/or database.") sys.exit(1) raise MySQLDatabaseError(f"No connection to database {database} due " "to invalid credentials and/or database.") alchemist.build_engine() engine = alchemist.engine if db_filepath is not None: mysqldb_basic.install_db(engine, db_filepath) else: mysqldb.execute_transaction(engine, db_schema_0.STATEMENTS) if schema_version is not None: curr_schema_version = mysqldb.get_schema_version(engine) if not curr_schema_version == schema_version: if verbose: print(f"Schema version {curr_schema_version} " "database detected.\nBeginning database conversion to " f"schema version {schema_version}...") convert_args = [ "pdm_utils.run", "convert", database, "-s", str(schema_version) ] if verbose: convert_args.append("-v") if config_file is not None: convert_args.extend(["-c", config_file]) convert_db.main(convert_args) engine.dispose()