def apply_filters(alchemist, table, filters, values=None, verbose=False): """Applies MySQL WHERE clause filters using a Filter. :param alchemist: A connected and fully built AlchemyHandler object. :type alchemist: AlchemyHandler :param table: MySQL table name. :type table: str :param filters: A list of lists with filter values, grouped by ORs. :type filters: list[list[str]] :param groups: A list of supported MySQL column names. :type groups: list[str] :returns: filter-Loaded Filter object. :rtype: Filter """ db_filter = Filter(alchemist=alchemist, key=table) db_filter.key = table db_filter.values = values try: db_filter.add(filters) except: print("Please check your syntax for the conditional string: " f"{filters}") exit(1) return db_filter
def get_cds_seqrecords(alchemist, values, data_cache=None, nucleotide=False, verbose=False): if data_cache is None: data_cache = {} cds_list = parse_feature_data(alchemist, values=values) db_filter = Filter(alchemist) db_filter.key = 'gene.GeneID' if verbose: print("...Converting SQL data...") seqrecords = [] for cds in cds_list: parent_genome = data_cache.get(cds.genome_id) if parent_genome is None: parent_genome = get_single_genome(alchemist, cds.genome_id, data_cache=data_cache) cds.genome_length = parent_genome.length cds.set_seqfeature() db_filter.values = [cds.id] gene_domains = db_filter.select(CDD_DATA_COLUMNS) record = flat_files.cds_to_seqrecord(cds, parent_genome, gene_domains=gene_domains) seqrecords.append(record) return seqrecords
def main(unparsed_args_list): """Run main get_gb_records pipeline.""" # Parse command line arguments args = parse_args(unparsed_args_list) # Filters input: phage.Status=draft AND phage.HostGenus=Mycobacterium # Args structure: [['phage.Status=draft'], ['phage.HostGenus=Mycobacterium']] filters = args.filters ncbi_cred_dict = ncbi.get_ncbi_creds(args.ncbi_credentials_file) output_folder = basic.set_path(args.output_folder, kind="dir", expect=True) working_dir = pathlib.Path(RESULTS_FOLDER) working_path = basic.make_new_dir(output_folder, working_dir, attempt=50) if working_path is None: print(f"Invalid working directory '{working_dir}'") sys.exit(1) # Verify database connection and schema compatibility. print("Connecting to the MySQL database...") alchemist = AlchemyHandler(database=args.database) alchemist.connect(pipeline=True) engine = alchemist.engine mysqldb.check_schema_compatibility(engine, "the get_gb_records pipeline") # Get SQLAlchemy metadata Table object # table_obj.primary_key.columns is a # SQLAlchemy ColumnCollection iterable object # Set primary key = 'phage.PhageID' alchemist.build_metadata() table = querying.get_table(alchemist.metadata, TARGET_TABLE) for column in table.primary_key.columns: primary_key = column # Create filter object and then add command line filter strings db_filter = Filter(alchemist=alchemist, key=primary_key) db_filter.values = [] # Attempt to add filters and exit if needed. add_filters(db_filter, filters) # Performs the query db_filter.update() # db_filter.values now contains list of PhageIDs that pass the filters. # Get the accessions associated with these PhageIDs. keep_set = set(db_filter.values) # Create data sets print("Retrieving accessions from the database...") query = construct_accession_query(keep_set) list_of_dicts = mysqldb_basic.query_dict_list(engine, query) id_acc_dict = get_id_acc_dict(list_of_dicts) acc_id_dict = get_acc_id_dict(id_acc_dict) engine.dispose() if len(acc_id_dict.keys()) > 0: get_data(working_path, acc_id_dict, ncbi_cred_dict) else: print("There are no records to retrieve.")
def get_acc_id_dict(alchemist): """Test helper function to retrieve accessions of database entries. """ db_filter = Filter(alchemist=alchemist) db_filter.key = "phage.PhageID" db_filter.values = db_filter.build_values() groups = db_filter.group("phage.Accession") return groups
def execute_resubmit(alchemist, revisions_data_dicts, folder_path, folder_name, filters="", groups=[], verbose=False): """Executes the entirety of the genbank resubmit pipeline. :param alchemist: A connected and fully built AlchemyHandler object. :type alchemist: AlchemyHandler :param revisions_data_dicts: Data dictionaries containing pham/notes data. :type revisions_data_dicts: list[dict] :param folder_path: Path to a valid dir for new dir creation. :type folder_path: Path :param folder_name: A name for the export folder. :type folder_name: str :param verbose: A boolean value to toggle progress print statements. :type verbose: bool """ db_filter = Filter(alchemist=alchemist) db_filter.key = "gene.PhamID" db_filter.add(BASE_CONDITIONALS) if filters != "": try: db_filter.add(filters) except: print("Please check your syntax for the conditional string:\n" f"{filters}") resubmit_columns = db_filter.get_columns(RESUBMIT_COLUMNS) phams = [] for data_dict in revisions_data_dicts: phams.append(data_dict["Pham"]) db_filter.values = phams if verbose: print("Creating export folder...") export_path = folder_path.joinpath(folder_name) export_path = basic.make_new_dir(folder_path, export_path, attempt=50) conditionals_map = {} export_db.build_groups_map(db_filter, export_path, conditionals_map, groups=groups, verbose=verbose) if verbose: print("Prepared query and path structure, beginning review export...") for mapped_path in conditionals_map.keys(): if verbose: print("Retreiving phage data for pham revisions...") export_dicts = [] for data_dict in revisions_data_dicts: if verbose: print(f"...Retrieving data for pham {data_dict['Pham']}...") conditionals = conditionals_map[mapped_path] final_call = data_dict["Final Call"] if final_call == "Hypothetical Protein": final_call = "" conditionals.append( querying.build_where_clause(alchemist.graph, f"gene.Notes!={final_call}")) query = querying.build_select(alchemist.graph, resubmit_columns, where=conditionals) results = querying.execute(alchemist.engine, query, in_column=db_filter.key, values=[data_dict["Pham"]]) for result in results: format_resubmit_data(result, data_dict["Final Call"]) export_dicts.append(result) if not export_dicts: if verbose: print("'{mapped_path.name}' data selected for resubmision " "matches selected call; no resubmision exported...") mapped_path.rmdir() continue export_dicts = sorted(export_dicts, key=lambda export_dict: export_dict["Phage"]) if verbose: print(f"Writing {CSV_NAME} in {mapped_path.name}...") file_path = mapped_path.joinpath(CSV_NAME) basic.export_data_dict(export_dicts, file_path, RESUBMIT_HEADER, include_headers=True)
def main(unparsed_args_list): """Run main freeze database pipeline.""" args = parse_args(unparsed_args_list) ref_database = args.database reset = args.reset new_database = args.new_database_name prefix = args.prefix # Filters input: phage.Status=draft AND phage.HostGenus=Mycobacterium # Args structure: [['phage.Status=draft'], ['phage.HostGenus=Mycobacterium']] filters = args.filters # Create config object with data obtained from file and/or defaults. config = configfile.build_complete_config(args.config_file) mysql_creds = config["mysql"] # Verify database connection and schema compatibility. print("Connecting to the MySQL database...") alchemist1 = AlchemyHandler(database=ref_database, username=mysql_creds["user"], password=mysql_creds["password"]) alchemist1.connect(pipeline=True) engine1 = alchemist1.engine mysqldb.check_schema_compatibility(engine1, "the freeze pipeline") # Get SQLAlchemy metadata Table object # table_obj.primary_key.columns is a # SQLAlchemy ColumnCollection iterable object # Set primary key = 'phage.PhageID' alchemist1.build_metadata() table = querying.get_table(alchemist1.metadata, TARGET_TABLE) for column in table.primary_key.columns: primary_key = column # Create filter object and then add command line filter strings db_filter = Filter(alchemist=alchemist1, key=primary_key) db_filter.values = [] # Attempt to add filters and exit if needed. add_filters(db_filter, filters) # Performs the query db_filter.update() # db_filter.values now contains list of PhageIDs that pass the filters. # Get the number of genomes that will be retained and build the # MYSQL DELETE statement. keep_set = set(db_filter.values) delete_stmt = construct_delete_stmt(TARGET_TABLE, primary_key, keep_set) count_query = construct_count_query(TARGET_TABLE, primary_key, keep_set) phage_count = mysqldb_basic.scalar(alchemist1.engine, count_query) # Determine the name of the new database. if new_database is None: if prefix is None: prefix = get_prefix() new_database = f"{prefix}_{phage_count}" # Create the new database, but prevent overwriting of current database. if engine1.url.database != new_database: result = mysqldb_basic.drop_create_db(engine1, new_database) else: print( "Error: names of the reference and frozen databases are the same.") print("No database will be created.") result = 1 # Copy database. if result == 0: print(f"Reference database: {ref_database}") print(f"New database: {new_database}") result = mysqldb_basic.copy_db(engine1, new_database) if result == 0: print(f"Deleting genomes...") alchemist2 = AlchemyHandler(database=new_database, username=engine1.url.username, password=engine1.url.password) alchemist2.connect(pipeline=True) engine2 = alchemist2.engine engine2.execute(delete_stmt) if reset: engine2.execute(RESET_VERSION) # Close up all connections in the connection pool. engine2.dispose() else: print("Unable to copy the database.") # Close up all connections in the connection pool. engine1.dispose() else: print(f"Error creating new database: {new_database}.") print("Freeze database script completed.")
def execute_review(alchemist, folder_path, folder_name, review=True, values=[], filters="", groups=[], sort=[], g_reports=False, s_report=False, verbose=False): """Executes the entirety of the pham review pipeline. :param alchemist: A connected and fully built AlchemyHandler object. :type alchemist: AlchemyHandler :param folder_path: Path to a valid dir for new dir creation. :type folder_path: Path :param folder_name: A name for the export folder. :type folder_name: str :param csv_title: Title for an appended csv file prefix. :type csv_title: str :param review: A boolean to toggle filtering of phams by pham discrepancies. :type review: bool :param values: List of values to filter database results. :type values: list[str] :param filters: A list of lists with filter values, grouped by ORs. :type filters: list[list[str]] :param groups: A list of supported MySQL column names to group by. :type groups: list[str] :param sort: A list of supported MySQL column names to sort by. :param g_reports: A boolean to toggle export of additional pham information. :type g_reports: bool :param verbose: A boolean value to toggle progress print statements. :type verbose: bool """ db_filter = Filter(alchemist=alchemist) db_filter.key = ("gene.PhamID") if values: db_filter.values = values if verbose: print(f"Identified {len(values)} phams to review...") if filters != "": try: db_filter.add(filters) except: print("Please check your syntax for the conditional string:\n" f"{filters}") sys.exit(1) finally: db_filter.update() db_filter._filters = [] db_filter._updated = False db_filter._or_index = -1 db_filter.add(BASE_CONDITIONALS) db_filter.update() if not db_filter.values: print("Current settings produced no database hits.") sys.exit(1) if review: review_phams(db_filter, verbose=verbose) if sort: db_filter.sort(sort) if verbose: print("Creating export folder...") export_path = folder_path.joinpath(folder_name) export_path = basic.make_new_dir(folder_path, export_path, attempt=50) conditionals_map = {} export_db.build_groups_map(db_filter, export_path, conditionals_map, groups=groups, verbose=verbose) if verbose: print("Prepared query and path structure, beginning review export...") original_phams = db_filter.values total_g_data = {} for mapped_path in conditionals_map.keys(): conditionals = conditionals_map[mapped_path] db_filter.values = original_phams db_filter.values = db_filter.build_values(where=conditionals) pf_data = get_pf_data(alchemist, db_filter, verbose=verbose) write_report(pf_data, mapped_path, PF_HEADER, csv_name=f"FunctionReport", verbose=verbose) if g_reports: execute_g_report_export(alchemist, db_filter, mapped_path, total_g_data=total_g_data, verbose=verbose) if s_report: execute_s_report_export(alchemist, db_filter, conditionals, mapped_path, verbose=verbose)
def execute_export(alchemist, output_path, output_name, values=[], verbose=False, csv_export=False, ffile_export=None, db_export=False, table="phage", filters=[], groups=[]): """Executes the entirety of the file export pipeline. :param sql_handle: Input a valid SQLAlchemy Engine object. :type sql_handle: Engine: :param export_path: Input a valid path to place export folder. :type export_path: Path :param folder_name: Input a name for the export folder. :type folder_name: str :param phage_filter_list: Input a list of phageIDs. :type phage_filter_list: List[str] :param verbose: Input a boolean value for verbose option. :type verbose: boolean :param csv_export: Input a boolean value to toggle csv_export. :type csv_export: boolean :param ffile_export: Input a SeqIO supported file format to toggle ffile_export. :type ffile_export: str :param db_export: Input a boolean value to toggle db_export. :type db_export: boolean :param filters: Input a list of lists with filter values :type filters: List[List[str]] :param groups: Input a list of supported group values. :type groups: List[str] """ if verbose: print("Retrieving database version...") db_version = mysqldb.get_version_table_data(alchemist.engine) if verbose: print("Creating export folder...") export_path = output_path.joinpath(output_name) export_path = basic.make_new_dir(output_path, export_path, attempt=50) if db_export: if verbose: print("Writing SQL database file...") write_database(alchemist, db_version["Version"], export_path) elif csv_export or ffile_export != None: table_obj = alchemist.get_table(table) for column in table_obj.primary_key.columns: primary_key = column db_filter = Filter(alchemist=alchemist, key=primary_key) db_filter.values = values for or_filters in filters: for filter in or_filters: db_filter.add(filter) db_filter.update() if filters and not db_filter.values: return values_map = {} if groups: build_groups_map(db_filter, export_path, groups=groups, values_map=values_map, verbose=verbose) else: values_map.update({export_path: db_filter.values}) for export_path in values_map.keys(): values = values_map[export_path] if csv_export: execute_csv_export(alchemist, export_path, table=table, values=values, verbose=verbose) elif ffile_export != None: execute_ffx_export(alchemist, export_path, ffile_export, db_version, table=table, values=values, verbose=verbose)