def parse_arguments(): info = ('Uses BFS to connect hotspot residues into connected regions ' 'within a structure.') parser = argparse.ArgumentParser(description=info) # program arguments parser.add_argument('-i', '--input', type=str, required=True, help='Output file from hotspot.py which has p-values for residues') parser.add_argument('-a', '--annotation-dir', type=str, required=True, help='Annotation directory from CRAVAT') parser.add_argument('-p', '--pdb-info', type=str, required=True, help='PDB information file (contains paths to PDBs)') parser.add_argument('-r', '--radius', default=3.0, type=float, help='Sphere radius in angstroms for connecting link between two residues (Default: 10.0)') parser.add_argument('-o', '--output', default='output.txt', type=str, help='Output result file for hotspot regions') parser.add_argument('-s', '--significance', type=str, required=True, help='File containing p-value thresholds for each tumour type') # logging arguments parser.add_argument('-ll', '--log-level', type=str, action='store', default='', help='Write a log file (--log-level=DEBUG for debug mode, ' '--log-level=INFO for info mode)') parser.add_argument('-l', '--log', type=str, action='store', default='', help='Path to log file. (accepts "stdout")') args = parser.parse_args() # handle logging if args.log_level or args.log: if args.log: log_file = args.log else: log_file = '' # auto-name the log file else: log_file = os.devnull log_level = args.log_level utils.start_logging(log_file=log_file, log_level=log_level) # start logging opts = vars(args) return opts
def parse_arguments(): info = 'Uses BFS to connect hotspot residues into connected regions.' parser = argparse.ArgumentParser(description=info) # program arguments parser.add_argument('-m', '--multiple-testing', type=str, required=True, help='File that corrects for multiple testing') parser.add_argument('-a', '--annotation-dir', type=str, required=True, help='Annotation directory from CRAVAT') parser.add_argument('-p', '--pdb-info', type=str, required=True, help='PDB information file (contains paths to PDBs)') parser.add_argument('-r', '--radius', default=10.0, type=float, help='Sphere radius in angstroms for connecting link between two residues (Default: 10.0)') parser.add_argument('-q', '--q-value', default=.01, type=float, help='Q-value for FDR (Default: .01)') parser.add_argument('-o', '--output', default='output.txt', type=str, help='Output result file for hotspot regions') # logging arguments parser.add_argument('-ll', '--log-level', type=str, action='store', default='', help='Write a log file (--log-level=DEBUG for debug mode, ' '--log-level=INFO for info mode)') parser.add_argument('-l', '--log', type=str, action='store', default='', help='Path to log file. (accepts "stdout")') args = parser.parse_args() # handle logging if args.log_level or args.log: if args.log: log_file = args.log else: log_file = '' # auto-name the log file else: log_file = os.devnull log_level = args.log_level utils.start_logging(log_file=log_file, log_level=log_level) # start logging opts = vars(args) return opts
def parse_arguments(): info = 'Adds column for path to the correct PDB file' parser = argparse.ArgumentParser(description=info) parser.add_argument('-p', '--pdb-info', type=str, required=True, help='PDB Info file from mupit_modbase') parser.add_argument('-o', '--output', type=str, required=True, help='Updated PDB info file with path to PDBs') # logging arguments parser.add_argument('-ll', '--log-level', type=str, action='store', default='', help='Write a log file (--log-level=DEBUG for debug mode, ' '--log-level=INFO for info mode)') parser.add_argument('-l', '--log', type=str, action='store', default='', help='Path to log file. (accepts "stdout")') args = parser.parse_args() # handle logging if args.log_level or args.log: if args.log: log_file = args.log else: log_file = '' # auto-name the log file else: log_file = os.devnull log_level = args.log_level utils.start_logging(log_file=log_file, log_level=log_level) # start logging return vars(args)
def parse_arguments(): info = 'Uses BFS to connect hotspot residues into connected regions.' parser = argparse.ArgumentParser(description=info) # program arguments parser.add_argument('-m', '--multiple-testing', type=str, required=True, help='File that corrects for multiple testing') parser.add_argument('-a', '--annotation-dir', type=str, required=True, help='Annotation directory from CRAVAT') parser.add_argument('-p', '--pdb-info', type=str, required=True, help='PDB information file (contains paths to PDBs)') parser.add_argument( '-r', '--radius', default=10.0, type=float, help= 'Sphere radius in angstroms for connecting link between two residues (Default: 10.0)' ) parser.add_argument('-q', '--q-value', default=.01, type=float, help='Q-value for FDR (Default: .01)') parser.add_argument('-o', '--output', default='output.txt', type=str, help='Output result file for hotspot regions') # logging arguments parser.add_argument( '-ll', '--log-level', type=str, action='store', default='', help='Write a log file (--log-level=DEBUG for debug mode, ' '--log-level=INFO for info mode)') parser.add_argument('-l', '--log', type=str, action='store', default='', help='Path to log file. (accepts "stdout")') args = parser.parse_args() # handle logging if args.log_level or args.log: if args.log: log_file = args.log else: log_file = '' # auto-name the log file else: log_file = os.devnull log_level = args.log_level utils.start_logging(log_file=log_file, log_level=log_level) # start logging opts = vars(args) return opts
def parse_arguments(): info = 'Detects hotspot protein regions' parser = argparse.ArgumentParser(description=info) # program arguments parser.add_argument('-m', '--mutations', type=str, required=True, help='Mutation counts for specific structures') parser.add_argument('-a', '--annotation', type=str, required=True, help='Annotations about PDB') parser.add_argument('-n', '--num-simulations', default=10000, type=int, help='Number of simulations (Default: 10000)') parser.add_argument('-r', '--radius', default=10.0, type=float, help='Sphere radius in angstroms (Default: 10.0)') parser.add_argument('-s', '--seed', default=101, type=int, help='Random number generator seed (Default: 101)') parser.add_argument( '-sc', '--stop-criterion', default=200, type=int, help='Number of simulations exceeding the maximum observed ' 'residue before stopping. This speeds computation by spending ' 'less time on non-significant structures. (Default: 200)') parser.add_argument( '-t', '--tumor-type', type=str, default='EVERY', help= 'Perform analysis for only specific tumor type (Default: "EVERY" = each tumor type)' ) parser.add_argument( '-e', '--error-pdb', type=str, default=None, help='File containing structures that have badly formated pdb files') parser.add_argument('-o', '--output', default='output.txt', type=str, help='Output result file of hotspots') # logging arguments parser.add_argument( '-ll', '--log-level', type=str, action='store', default='', help='Write a log file (--log-level=DEBUG for debug mode, ' '--log-level=INFO for info mode)') parser.add_argument('-l', '--log', type=str, action='store', default='', help='Path to log file. (accepts "stdout")') args = parser.parse_args() # handle logging if args.log_level or args.log: if args.log: log_file = args.log else: log_file = '' # auto-name the log file else: log_file = os.devnull log_level = args.log_level utils.start_logging(log_file=log_file, log_level=log_level) # start logging opts = vars(args) return opts
def parse_arguments(): info = 'Detects hotspot protein regions' parser = argparse.ArgumentParser(description=info) # program arguments parser.add_argument('-m', '--mutations', type=str, required=True, help='Mutation counts for specific structures') parser.add_argument('-a', '--annotation', type=str, required=True, help='Annotations about PDB') parser.add_argument('-n', '--num-simulations', default=10000, type=int, help='Number of simulations (Default: 10000)') parser.add_argument('-r', '--radius', default=10.0, type=float, help='Sphere radius in angstroms (Default: 10.0)') parser.add_argument('-s', '--seed', default=None, type=int, help='Random number generator seed (Default: automatic)') parser.add_argument('-sc', '--stop-criterion', default=200, type=int, help='Number of simulations exceeding the maximum observed ' 'residue before stopping. This speeds computation by spending ' 'less time on non-significant structures. (Default: 200)') parser.add_argument('-t', '--tumor-type', type=str, default='EVERY', help='Perform analysis for only specific tumor type (Default: "EVERY" = each tumor type)') parser.add_argument('-e', '--error-pdb', type=str, default=None, help='File containing structures that have badly formated pdb files') parser.add_argument('-o', '--output', default='output.txt', type=str, help='Output result file of hotspots') # logging arguments parser.add_argument('-ll', '--log-level', type=str, action='store', default='', help='Write a log file (--log-level=DEBUG for debug mode, ' '--log-level=INFO for info mode)') parser.add_argument('-l', '--log', type=str, action='store', default='', help='Path to log file. (accepts "stdout")') args = parser.parse_args() # handle logging if args.log_level or args.log: if args.log: log_file = args.log else: log_file = '' # auto-name the log file else: log_file = os.devnull log_level = args.log_level utils.start_logging(log_file=log_file, log_level=log_level) # start logging opts = vars(args) return opts
:param df: :return: ''' # First drop duplicated indexes return df.groupby(df.index).first() #TODO: Write unit test for this function def merge_and_fill_gaps(df, left_column, right_column): logging.info('filling holes') pre_merge_zeros = count_empty_rows(df, column=left_column) logging.debug(f'Rows with 0 or NaN prior to merge {pre_merge_zeros}') df[left_column] = df.apply(lambda x: max(x[left_column], x[right_column]), axis=1) post_merge_zeros = count_empty_rows(df, column=left_column) logging.debug(f'Rows with 0 or NaN post to merge {post_merge_zeros}') return df def count_empty_rows(df, column): return (df[column].isna().sum()) + (df[column] == 0).sum() if __name__ == '__main__': start_logging(debug_to_console=True) COMMON_REPLACEMENTS = { 'United States of America': 'USA', 'United States': 'US' }