def main(argv): if argv.help: log.info( '\n' 'ClinicalDataAnnotator.py -i <input clinical file> -o <output clinical file> -a <annotated alteration files, separate by ,> [-s sample list filter]\n' ' Essential clinical columns:\n' ' SAMPLE_ID: sample ID') sys.exit() if argv.sample_ids_filter: setsampleidsfileterfile(argv.sample_ids_filter) annotated_alteration_files = re.split(',|, ', argv.annotated_alteration_files) if argv.input_file == '' or argv.output_file == '' or len( annotated_alteration_files) == 0: required_params = [] if argv.input_file == '': required_params.append('-i') if argv.output_file == '': required_params.append('-o') if len(annotated_alteration_files) == 0: required_params.append('-a') log.error('The parameter(s) ' + ', '.join(required_params) + ' can not be empty') log.info('for help: python ClinicalDataAnnotator.py -h') sys.exit(2) log.info('annotating %s ...' % argv.input_file) process_clinical_data(annotated_alteration_files, argv.input_file, argv.output_file) log.info('done!')
def main(argv): params = { "catogerycolumn": argv.catogery_column, # -c "thresholdcat": argv.threshold_cat, # -n } if argv.help: log.info( '\n' 'OncoKBPlots.py -i <annotated clinical file> -o <output PDF file> [-c <categorization column, ' 'e.g. CANCER_TYPE>] [-s sample list filter] [-n threshold of # samples in a category] [-l comma separated levels to include]\n' ' Essential clinical columns:\n' ' SAMPLE_ID: sample ID\n' ' HIGHEST_LEVEL: Highest OncoKB levels\n' ' Supported levels (-l): \n' ' LEVEL_1,LEVEL_2,LEVEL_3A,LEVEL_3B,LEVEL_4,ONCOGENIC,VUS') sys.exit() if argv.input_file == '' or argv.output_file == '': required_params = [] if argv.input_file == '': required_params.append('-i') if argv.output_file == '': required_params.append('-o') log.error('The parameter(s) ' + ', '.join(required_params) + ' can not be empty') log.info('for help: python OncoKBPlots.py -h') sys.exit(2) if argv.sample_ids_filter: setsampleidsfileterfile(argv.sample_ids_filter) if argv.levels: params["levels"] = re.split(',', argv.levels) log.info('annotating %s ...' % argv.input_file) fig, (ax1, ax2, ax3) = plt.subplots(3, 1) plotclinicalactionability(ax1, argv.input_file, argv.output_file, params) # ax.yaxis.grid(linestyle="dotted", color="lightgray") # horizontal lines # plt.margins(0.01) plotclinicalactionability(ax1, args.input_file, args.output_file, params) plotimplications(ax2, 'HIGHEST_DX_LEVEL', 'OncoKB Diagnostic Implications', dxLevels, args.input_file, argv.output_file, params) plotimplications(ax3, 'HIGHEST_PX_LEVEL', 'OncoKB Prognostic Implications', pxLevels, args.input_file, argv.output_file, params) plt.subplots_adjust(left=0.2, bottom=0.3) plt.gcf().text(0.90, 0.1, "Generated by OncoKB\n[Chakravarty et al., JCO PO 2017]", fontsize=6, horizontalalignment='right', verticalalignment='bottom') fig.tight_layout() fig.savefig(argv.output_file, bbox_inches='tight') log.info('done!')
def main(argv): if argv.help: log.info( '\n' "FusionAnnotator.py -i <input Fusion file> -o <output Fusion file> [-p previous results] [-c <input clinical file>] [-s sample list filter] [-t <default tumor type>] [-u <oncokb api url>] [-b <oncokb api bear token>] [-r <structural variant name format, default: [A-Za-z\\d]+-[A-Za-z\\d]+>]\n" ' Essential Fusion columns (case insensitive):\n' ' HUGO_SYMBOL: Hugo gene symbol\n' ' VARIANT_CLASSIFICATION: Translational effect of variant allele\n' ' TUMOR_SAMPLE_BARCODE: sample ID\n' ' FUSION: amino acid change, e.g. "TMPRSS2-ERG"\n' ' Essential clinical columns:\n' ' SAMPLE_ID: sample ID\n' ' ONCOTREE_CODE: tumor type code from oncotree (oncotree.mskcc.org)\n' ' Cancer type will be assigned based on the following priority:\n' ' 1) ONCOTREE_CODE in clinical data file\n' ' 2) ONCOTREE_CODE exist in Fusion\n' ' 3) default tumor type (-t)\n' ' Default OncoKB base url is https://www.oncokb.org') sys.exit() if argv.input_file == '' or argv.output_file == '' or argv.oncokb_api_bearer_token == '': required_params = [] if argv.input_file == '': required_params.append('-i') if argv.output_file == '': required_params.append('-o') if argv.oncokb_api_bearer_token == '': required_params.append('-b') log.error('The parameter(s) ' + ', '.join(required_params) + ' can not be empty') log.info('for help: python FusionAnnotator.py -h') sys.exit(2) if argv.sample_ids_filter: setsampleidsfileterfile(argv.sample_ids_filter) if argv.cancer_hotspots_base_url: setcancerhotspotsbaseurl(argv.cancer_hotspots_base_url) if argv.oncokb_api_url: setoncokbbaseurl(argv.oncokb_api_url) setoncokbapitoken(argv.oncokb_api_bearer_token) cancertypemap = {} if argv.input_clinical_file: readCancerTypes(argv.input_clinical_file, cancertypemap) validate_oncokb_token() log.info('annotating %s ...' % argv.input_file) process_fusion(argv.input_file, argv.output_file, argv.previous_result_file, argv.default_cancer_type, cancertypemap, argv.structural_variant_name_format) log.info('done!')
def main(argv): if argv.help: log.info( '\n' 'StructuralVariantAnnotator.py -i <input structural variant file> -o <output structural variant file> [-p previous results] [-c <input clinical file>] [-s sample list filter] [-t <default tumor type>] [-u <oncokb api url>] [-b <oncokb api bear token>]\n' ' Essential structural variant columns (case insensitive):\n' ' GENEA: Hugo gene symbol for gene A\n' ' GENEB: Hugo gene symbol for gene B\n' ' SV_TYPE: Structural variant type. Available values: DELETION, TRANSLOCATION, DUPLICATION, INSERTION, INVERSION, FUSION, UNKNOWN. Other type will be converted to UNKNOWN\n' ' TUMOR_SAMPLE_BARCODE: sample ID\n' ' Essential clinical columns:\n' ' SAMPLE_ID: sample ID\n' ' ONCOTREE_CODE: tumor type code from oncotree (oncotree.mskcc.org)\n' ' Cancer type will be assigned based on the following priority:\n' ' 1) ONCOTREE_CODE in clinical data file\n' ' 2) ONCOTREE_CODE exist in structural variant\n' ' 3) default tumor type (-t)\n' ' Default OncoKB base url is https://www.oncokb.org') sys.exit() if argv.input_file == '' or argv.output_file == '' or argv.oncokb_api_bearer_token == '': required_params = [] if argv.input_file == '': required_params.append('-i') if argv.output_file == '': required_params.append('-o') if argv.oncokb_api_bearer_token == '': required_params.append('-b') log.error('The parameter(s) ' + ', '.join(required_params) + ' can not be empty') log.info('for help: python StructuralVariantAnnotator.py -h') sys.exit(2) if argv.sample_ids_filter: setsampleidsfileterfile(argv.sample_ids_filter) if argv.cancer_hotspots_base_url: setcancerhotspotsbaseurl(argv.cancer_hotspots_base_url) if argv.oncokb_api_url: setoncokbbaseurl(argv.oncokb_api_url) setoncokbapitoken(argv.oncokb_api_bearer_token) cancertypemap = {} if argv.input_clinical_file: readCancerTypes(argv.input_clinical_file, cancertypemap) validate_oncokb_token() log.info('annotating %s ...' % argv.input_file) process_sv(argv.input_file, argv.output_file, argv.previous_result_file, argv.default_cancer_type, cancertypemap) log.info('done!')
def main(argv): if argv.help: log.info( '\n' 'CnaAnnotator.py -i <input CNA file> -o <output CNA file> [-p previous results] [-c <input clinical file>] [-s sample list filter] [-t <default tumor type>] [-u oncokb-base-url] [-b oncokb_api_bear_token] [-z annotate_gain_loss] [-f CNA file formt, gistic or individual]\n' ' Input CNA file uses GISTIC output by default (https://docs.cbioportal.org/5.1-data-loading/data-loading/file-formats#data-file-1). You can also list copy number alteration individually by specifying -f=individual\n' ' Essential clinical columns:\n' ' SAMPLE_ID: sample ID\n' ' Cancer type will be assigned based on the following priority:\n' ' 1) ONCOTREE_CODE in clinical data file\n' ' 2) ONCOTREE_CODE exist in MAF\n' ' 3) default tumor type (-t)\n' ' We do not annotate Gain and Loss by default, add -z to include the analysis. See https://github.com/oncokb/oncokb-annotator/issues/51 for more information.\n' ' Default OncoKB base url is https://www.oncokb.org') sys.exit() if argv.input_file == '' or argv.output_file == '' or argv.oncokb_api_bearer_token == '': required_params = [] if argv.input_file == '': required_params.append('-i') if argv.output_file == '': required_params.append('-o') if argv.oncokb_api_bearer_token == '': required_params.append('-b') log.error('The parameter(s) ' + ', '.join(required_params) + ' can not be empty') log.info('for help: python CnaAnnotator.py -h') sys.exit(2) if argv.sample_ids_filter: setsampleidsfileterfile(argv.sample_ids_filter) if argv.oncokb_api_url: setoncokbbaseurl(argv.oncokb_api_url) setoncokbapitoken(argv.oncokb_api_bearer_token) cancertypemap = {} if argv.input_clinical_file: readCancerTypes(argv.input_clinical_file, cancertypemap) validate_oncokb_token() log.info('annotating %s ...' % argv.input_file) process_cna_data(argv.input_file, argv.output_file, argv.previous_result_file, argv.default_cancer_type, cancertypemap, argv.annotate_gain_loss, argv.cna_file_format.lower()) log.info('done!')
def main(argv): if argv.help: log.info( '\n' 'MafAnnotator.py -i <input MAF file> -o <output MAF file> [-p previous results] [-c <input clinical file>] ' '[-s sample list filter] [-t <default tumor type>] [-u oncokb-base-url] [-b oncokb api bear token] [-a] [-q query type] [-r default reference genome]\n' 'For definitions of the MAF format, please see https://docs.gdc.cancer.gov/Data/File_Formats/MAF_Format/\n\n' 'Essential MAF columns for querying HGVSp_Short and HGVSp(case insensitive):\n' ' Hugo_Symbol: Hugo gene symbol\n' ' Tumor_Sample_Barcode: sample ID\n' ' HGVSp(query type: HGVSp): protein change in HGVSp format\n' ' HGVSp_Short(query type: HGVSp_Short): protein change in HGVSp format using 1-letter amino-acid codes\n' 'Essential MAF columns for querying HGVSg(case insensitive):\n' ' Tumor_Sample_Barcode: sample ID\n' ' HGVSg: Genomic change in HGVSg format\n' 'Essential MAF columns for querying genomic change(case insensitive):\n' ' Tumor_Sample_Barcode: sample ID\n' ' Chromosome: Chromosome number\n' ' Start_Position: Mutation start coordinate\n' ' End_Position: Mutation end coordinate\n' ' Reference_Allele: The plus strand reference allele at this position\n' ' Tumor_Seq_Allele1: Primary data genotype for tumor sequencing (discovery) allele\n' ' Tumor_Seq_Allele2: Tumor sequencing (discovery) allele 2\n' 'Essential clinical columns:\n' ' SAMPLE_ID: sample ID\n' ' ONCOTREE_CODE: tumor type code from oncotree (http://oncotree.mskcc.org)\n' 'Cancer type will be assigned based on the following priority:\n' ' 1) ONCOTREE_CODE in clinical data file\n' ' 2) ONCOTREE_CODE exist in MAF\n' ' 3) default tumor type (-t)\n' 'Query type only allows the following values (case-insensitive):\n' ' - HGVSp_Short\n' ' It reads from column HGVSp_Short or Alteration\n' ' - HGVSp\n' ' It reads from column HGVSp or Alteration\n' ' - HGVSg\n' ' It reads from column HGVSg or Alteration\n' ' - Genomic_Change\n' ' It reads from columns Chromosome, Start_Position, End_Position, Reference_Allele, Tumor_Seq_Allele1 and Tumor_Seq_Allele2 \n' 'Reference Genome only allows the following values(case-insensitive):\n' ' - GRCh37\n' ' GRCh38\n' 'Default OncoKB base url is https://www.oncokb.org.\n' ) sys.exit() if argv.input_file == '' or argv.output_file == '' or argv.oncokb_api_bearer_token == '': required_params = [] if argv.input_file == '': required_params.append('-i') if argv.output_file == '': required_params.append('-o') if argv.oncokb_api_bearer_token == '': required_params.append('-b') log.error('The parameter(s) ' + ', '.join(required_params) + ' can not be empty') log.info('For help: python MafAnnotator.py -h') sys.exit(2) if argv.sample_ids_filter: setsampleidsfileterfile(argv.sample_ids_filter) if argv.cancer_hotspots_base_url: setcancerhotspotsbaseurl(argv.cancer_hotspots_base_url) if argv.oncokb_api_url: setoncokbbaseurl(argv.oncokb_api_url) setoncokbapitoken(argv.oncokb_api_bearer_token) cancertypemap = {} if argv.input_clinical_file: readCancerTypes(argv.input_clinical_file, cancertypemap) log.info('annotating %s ...' % argv.input_file) user_input_query_type = None if argv.query_type is not None: try: user_input_query_type = QueryType[argv.query_type.upper()] except KeyError: log.error( 'Query type is not acceptable. Only the following allows(case insensitive): HGVSp_Short, HGVSp, HGVSg, Genomic_Change') raise default_reference_genome = None if argv.default_reference_genome is not None: try: default_reference_genome = ReferenceGenome[argv.default_reference_genome.upper()] except KeyError: log.error( 'Reference genome is not acceptable. Only the following allows(case insensitive): GRCh37, GRCh38') raise validate_oncokb_token() processalterationevents(argv.input_file, argv.output_file, argv.previous_result_file, argv.default_cancer_type, cancertypemap, argv.annotate_hotspots, user_input_query_type, default_reference_genome) log.info('done!')