def apply_filters(alchemist, table, filters, values=None, verbose=False): """Applies MySQL WHERE clause filters using a Filter. :param alchemist: A connected and fully built AlchemyHandler object. :type alchemist: AlchemyHandler :param table: MySQL table name. :type table: str :param filters: A list of lists with filter values, grouped by ORs. :type filters: list[list[str]] :param groups: A list of supported MySQL column names. :type groups: list[str] :returns: filter-Loaded Filter object. :rtype: Filter """ db_filter = Filter(alchemist=alchemist, key=table) db_filter.key = table db_filter.values = values try: db_filter.add(filters) except: print("Please check your syntax for the conditional string: " f"{filters}") exit(1) return db_filter
def get_cds_seqrecords(alchemist, values, data_cache=None, nucleotide=False, verbose=False): if data_cache is None: data_cache = {} cds_list = parse_feature_data(alchemist, values=values) db_filter = Filter(alchemist) db_filter.key = 'gene.GeneID' if verbose: print("...Converting SQL data...") seqrecords = [] for cds in cds_list: parent_genome = data_cache.get(cds.genome_id) if parent_genome is None: parent_genome = get_single_genome(alchemist, cds.genome_id, data_cache=data_cache) cds.genome_length = parent_genome.length cds.set_seqfeature() db_filter.values = [cds.id] gene_domains = db_filter.select(CDD_DATA_COLUMNS) record = flat_files.cds_to_seqrecord(cds, parent_genome, gene_domains=gene_domains) seqrecords.append(record) return seqrecords
def get_acc_id_dict(alchemist): """Test helper function to retrieve accessions of database entries. """ db_filter = Filter(alchemist=alchemist) db_filter.key = "phage.PhageID" db_filter.values = db_filter.build_values() groups = db_filter.group("phage.Accession") return groups
def execute_resubmit(alchemist, revisions_data_dicts, folder_path, folder_name, filters="", groups=[], verbose=False): """Executes the entirety of the genbank resubmit pipeline. :param alchemist: A connected and fully built AlchemyHandler object. :type alchemist: AlchemyHandler :param revisions_data_dicts: Data dictionaries containing pham/notes data. :type revisions_data_dicts: list[dict] :param folder_path: Path to a valid dir for new dir creation. :type folder_path: Path :param folder_name: A name for the export folder. :type folder_name: str :param verbose: A boolean value to toggle progress print statements. :type verbose: bool """ db_filter = Filter(alchemist=alchemist) db_filter.key = "gene.PhamID" db_filter.add(BASE_CONDITIONALS) if filters != "": try: db_filter.add(filters) except: print("Please check your syntax for the conditional string:\n" f"{filters}") resubmit_columns = db_filter.get_columns(RESUBMIT_COLUMNS) phams = [] for data_dict in revisions_data_dicts: phams.append(data_dict["Pham"]) db_filter.values = phams if verbose: print("Creating export folder...") export_path = folder_path.joinpath(folder_name) export_path = basic.make_new_dir(folder_path, export_path, attempt=50) conditionals_map = {} export_db.build_groups_map(db_filter, export_path, conditionals_map, groups=groups, verbose=verbose) if verbose: print("Prepared query and path structure, beginning review export...") for mapped_path in conditionals_map.keys(): if verbose: print("Retreiving phage data for pham revisions...") export_dicts = [] for data_dict in revisions_data_dicts: if verbose: print(f"...Retrieving data for pham {data_dict['Pham']}...") conditionals = conditionals_map[mapped_path] final_call = data_dict["Final Call"] if final_call == "Hypothetical Protein": final_call = "" conditionals.append( querying.build_where_clause(alchemist.graph, f"gene.Notes!={final_call}")) query = querying.build_select(alchemist.graph, resubmit_columns, where=conditionals) results = querying.execute(alchemist.engine, query, in_column=db_filter.key, values=[data_dict["Pham"]]) for result in results: format_resubmit_data(result, data_dict["Final Call"]) export_dicts.append(result) if not export_dicts: if verbose: print("'{mapped_path.name}' data selected for resubmision " "matches selected call; no resubmision exported...") mapped_path.rmdir() continue export_dicts = sorted(export_dicts, key=lambda export_dict: export_dict["Phage"]) if verbose: print(f"Writing {CSV_NAME} in {mapped_path.name}...") file_path = mapped_path.joinpath(CSV_NAME) basic.export_data_dict(export_dicts, file_path, RESUBMIT_HEADER, include_headers=True)
def execute_review(alchemist, folder_path, folder_name, review=True, values=[], filters="", groups=[], sort=[], g_reports=False, s_report=False, verbose=False): """Executes the entirety of the pham review pipeline. :param alchemist: A connected and fully built AlchemyHandler object. :type alchemist: AlchemyHandler :param folder_path: Path to a valid dir for new dir creation. :type folder_path: Path :param folder_name: A name for the export folder. :type folder_name: str :param csv_title: Title for an appended csv file prefix. :type csv_title: str :param review: A boolean to toggle filtering of phams by pham discrepancies. :type review: bool :param values: List of values to filter database results. :type values: list[str] :param filters: A list of lists with filter values, grouped by ORs. :type filters: list[list[str]] :param groups: A list of supported MySQL column names to group by. :type groups: list[str] :param sort: A list of supported MySQL column names to sort by. :param g_reports: A boolean to toggle export of additional pham information. :type g_reports: bool :param verbose: A boolean value to toggle progress print statements. :type verbose: bool """ db_filter = Filter(alchemist=alchemist) db_filter.key = ("gene.PhamID") if values: db_filter.values = values if verbose: print(f"Identified {len(values)} phams to review...") if filters != "": try: db_filter.add(filters) except: print("Please check your syntax for the conditional string:\n" f"{filters}") sys.exit(1) finally: db_filter.update() db_filter._filters = [] db_filter._updated = False db_filter._or_index = -1 db_filter.add(BASE_CONDITIONALS) db_filter.update() if not db_filter.values: print("Current settings produced no database hits.") sys.exit(1) if review: review_phams(db_filter, verbose=verbose) if sort: db_filter.sort(sort) if verbose: print("Creating export folder...") export_path = folder_path.joinpath(folder_name) export_path = basic.make_new_dir(folder_path, export_path, attempt=50) conditionals_map = {} export_db.build_groups_map(db_filter, export_path, conditionals_map, groups=groups, verbose=verbose) if verbose: print("Prepared query and path structure, beginning review export...") original_phams = db_filter.values total_g_data = {} for mapped_path in conditionals_map.keys(): conditionals = conditionals_map[mapped_path] db_filter.values = original_phams db_filter.values = db_filter.build_values(where=conditionals) pf_data = get_pf_data(alchemist, db_filter, verbose=verbose) write_report(pf_data, mapped_path, PF_HEADER, csv_name=f"FunctionReport", verbose=verbose) if g_reports: execute_g_report_export(alchemist, db_filter, mapped_path, total_g_data=total_g_data, verbose=verbose) if s_report: execute_s_report_export(alchemist, db_filter, conditionals, mapped_path, verbose=verbose)