Exemple #1
0
def supply_args():
    """
    Populate args.
    https://docs.python.org/2.7/library/argparse.html
    """
    parser = argparse.ArgumentParser(description='')
    parser.add_argument('infile',
                        help='Input VCF to apply Annovar annotations to.')
    parser.add_argument('outfile', help='Output VCF')
    parser.add_argument('--evf',
                        help='Input exonic_variant_function Annovar file.')
    parser.add_argument('--vf', help='Input variant_function Annovar file.')
    parser.add_argument(
        '--ccds_evf', help='Input CCDS exonic_variant_function Annovar file.')
    parser.add_argument('--ccds_vf',
                        help='Input CCDS variant_function Annovar file.')
    parser.add_argument('--version',
                        action='version',
                        version='%(prog)s ' + VERSION)
    args = parser.parse_args()

    if not args.evf and not args.vf:
        raise SyntaxError("Must specify either vf or evf, or both.")

    return args
def parse_args():
    """
    Description:
        function 'parse_args' parses arguments from command-line and returns an argparse
        object containing the arguments and their values. Default values are 'False' if option
        is not listed in the command, else the option value is set to True.
    """
    parser = argparse.ArgumentParser(description='Fill in hg18, hg19 genomic coordinates and cDNA hgvs strings in merged BRCA variant dataset.')
    parser.add_argument('-i', '--inBRCA', type=argparse.FileType('r'),
                        help='Input ENIGMA BRCA datatable file for conversion.')
    parser.add_argument('-j', '--inHg18', type=argparse.FileType('r'),
                        help='Input hg18 reference genome fasta file.')
    parser.add_argument('-k', '--inHg19', type=argparse.FileType('r'),
                        help='Input hg19 reference genome fasta file.')
    parser.add_argument('-l', '--inHg38', type=argparse.FileType('r'),
                        help='Input hg38 reference genome fasta file.')
    parser.add_argument('-r', '--inRefSeq18', type=argparse.FileType('r'),
                        help='Input refseq annotation hg18-based genepred file.')
    parser.add_argument('-s', '--inRefSeq19', type=argparse.FileType('r'),
                        help='Input refseq annotation hg19-based genepred file.')
    parser.add_argument('-t', '--inRefSeq38', type=argparse.FileType('r'),
                        help='Input refseq annotation hg38-based genepred file.')
    parser.add_argument('-p', '--calcProtein', dest='calcProtein', action='store_true',
                        help='Set flag for hgvs protein fill-in. May not result in complete fill-in.')
    parser.add_argument('-o', '--outBRCA', type=argparse.FileType('w'),
                        help='Output filled in ENIGMA BRCA datatable file.')
    parser.add_argument('--artifacts_dir', help='Artifacts directory with pipeline artifact files.')

    parser.set_defaults(calcProtein=False)
    options = parser.parse_args()
    return options
Exemple #3
0
def parse_args():
    """ 
    Description:
        function 'parse_args' parses arguments from command-line and returns an argparse
        object containing the arguments and their values. Default values are 'False' if option
        is not listed in the command, else the option value is set to True.
    """
    parser = argparse.ArgumentParser(
        'Input a bgzip compressed and tabix indexed vcf and output hgvs normalized vcf filename.'
    )
    parser.add_argument(
        '-i',
        '--inVCF',
        type=str,
        help='Input bgzip compressed and tabix indexed vcf filepath.')
    parser.add_argument('-o',
                        '--outVCF',
                        type=str,
                        help='Output hgvs-normalized VCF filename.')
    parser.add_argument(
        '-r',
        '--refFASTA',
        type=str,
        help='Input FASTA format reference filename. ex: "hg38.p12.fa"')
    parser.add_argument(
        '-g',
        '--refSEQ',
        type=str,
        help='Input GenePred format refSeq filename. ex: "ncbiRefSeq.txt"')

    options = parser.parse_args()
    return options
def main():
    global REFGENE

    parser = argparse.ArgumentParser()
    parser.add_argument('-i', '--readable_input', 
                        help='readable input file for conversion.')
    parser.add_argument('-o', '--writable_output', 
                        help='writable output file for conversion.')
    parser.add_argument('-g', '--genome_path', help='Link to hg38.fa.')
    parser.add_argument('-r', '--reference_genome', default='./hg38.BRCA.refGene.txt',
                        help='Link to hg38.BRCA.refgene.txt.')

    args = parser.parse_args()
    GENOME = SequenceFileDB(args.genome_path)
    REFGENE = args.reference_genome

    f_in = open(args.readable_input, "r")
    f_out = open(args.writable_output, "w")
    f_out.write("\t".join(OUTPUT_COLUMNS) + "\n")
    for index, line in enumerate(f_in):
        # 
        # Clean the line by removing leading or trailing spaces adjacent to tabs.  
        #
        line = re.sub("( )*\t( )*", "\t", line)
        items = np.array(line.rstrip().split("\t"))
        if index == 0:
            # Handle column names
            columns = np.array([i.replace(" ", "_") for i in items])
            index_to_save = [np.where(columns == i)[0][0] for i in COLUMNS_TO_SAVE]
            column_idx = dict(zip(COLUMNS_TO_SAVE, index_to_save))
            continue
        #
        # In the date last evaluated field, delete the time last evaluated if provided.
        #
        date_last_evaluated_idx = column_idx["Date_last_evaluated"]
        items[date_last_evaluated_idx] = items[date_last_evaluated_idx].split(' ')[0]
        OMIM_id_index = column_idx["Condition_ID_value"]
        items[OMIM_id_index] = convert_OMIM_id(items[OMIM_id_index])
        items[column_idx["HGVS"]] = cleanup_HGVS(items[column_idx["Reference_sequence"]],
                                 items[column_idx["HGVS"]], HP, EVM)
        HGVS_cDNA = items[column_idx["Reference_sequence"]] + ":" + items[column_idx["HGVS"]]
        print items[column_idx["Reference_sequence"]], items[column_idx["HGVS"]], HGVS_cDNA
        try:
            genome_coor, HGVS_p = convert_HGVS(HGVS_cDNA, GENOME)
        except:
            if (items[column_idx["HGVS"]]).find(";") > -1:
                genome_coor, HGVS_p = create_None_filler()
        aa_abrev_index = column_idx["Abbrev_AA_change"]
        if HGVS_p not in ["p.?", "p.(=)", "None"]:
            if items[aa_abrev_index] == '':
                items[aa_abrev_index] = HGVS_p_to_AA_abrev(HGVS_p)
        final_items = list(items[index_to_save])
        final_items.insert(1, genome_coor)
        final_items.append(HGVS_p)
        new_line = "\t".join(list(final_items)) + "\n"
        f_out.write(new_line)
    f_in.close()
    f_out.close()
Exemple #5
0
def main():
    parser = argparse.ArgumentParser(description='Script that produces sample based excel readable file for annotated variants.')
    parser.add_argument(dest='vcf', help='Path to the vcf. This file must have been split and normalized before.')
    parser.add_argument(dest='output_file', help='Path to the output tab separated file.')
    parser.add_argument('-n','--negativeIndividuals', metavar="neg_indivuals", help='File with negative sample names for a given phenotype. Will add column isPositive to the output.')
    parser.add_argument('-f','--fields', nargs='+', help='Specific fields to include')
    args = parser.parse_args()

    extractFields(args.vcf, args.output_file, args.negativeIndividuals, args.fields)
def main():
    global REFGENE

    parser = argparse.ArgumentParser()
    parser.add_argument('-i',
                        '--readable_input',
                        type=argparse.FileType('r'),
                        help='Opened readable input file for conversion.')
    parser.add_argument('-o',
                        '--writable_output',
                        type=argparse.FileType('w'),
                        help='Opened writable output file for conversion.')
    parser.add_argument('-g', '--genome_path', help='Link to hg38.fa.')
    parser.add_argument('-r',
                        '--reference_genome',
                        default='./hg38.BRCA.refGene.txt',
                        help='Link to hg38.BRCA.refgene.txt.')

    args = parser.parse_args()
    GENOME = SequenceFileDB(args.genome_path)
    REFGENE = args.reference_genome

    f_out = args.writable_output
    f_out.write("\t".join(OUTPUT_COLUMNS) + "\n")
    f_in = args.readable_input
    for index, line in enumerate(f_in):
        items = np.array(line.rstrip().split("\t"))
        if index == 0:
            # Handle column names
            columns = np.array([i.replace(" ", "_") for i in items])
            index_to_save = [
                np.where(columns == i)[0][0] for i in COLUMNS_TO_SAVE
            ]
            column_idx = dict(zip(COLUMNS_TO_SAVE, index_to_save))
            continue
        OMIM_id_index = column_idx["Condition_ID_value"]
        items[OMIM_id_index] = convert_OMIM_id(items[OMIM_id_index])
        HGVS_cDNA = (items[column_idx["Reference_sequence"]] + ":" +
                     items[column_idx["HGVS"]])
        try:
            genome_coor, HGVS_p = convert_HGVS(HGVS_cDNA, GENOME)
        except:
            if (items[column_idx["HGVS"]]).find(";") > -1:
                genome_coor, HGVS_p = create_None_filler()
        aa_abrev_index = column_idx["Abbrev_AA_change"]
        if HGVS_p not in ["p.?", "p.(=)", "None"]:
            if items[aa_abrev_index] == '':
                items[aa_abrev_index] = HGVS_p_to_AA_abrev(HGVS_p)
        final_items = list(items[index_to_save])
        final_items.insert(1, genome_coor)
        final_items.append(HGVS_p)
        new_line = "\t".join(list(final_items)) + "\n"
        f_out.write(new_line)
    f_in.close()
    f_out.close()
def supply_args():
    parser = argparse.ArgumentParser(description='')
    parser.add_argument('--input_vcf', help='Input VCF.')
    parser.add_argument('--output_vcf', help='Output VCF.')

    parser.add_argument('--chrom', help='Chromosome.')
    parser.add_argument('--pos', help='Position.')
    parser.add_argument('--ref', help='Refence Allele.')
    parser.add_argument('--alt', help='Alternate Allele.')

    parser.add_argument('--version',
                        action='version',
                        version='%(prog)s ' + VERSION)
    args = parser.parse_args()

    if args.input_vcf and not args.output_vcf:
        raise Exception(
            'Must specify an output VCF if you are specifying an input VCF.')
    if args.chrom and (not args.pos or not args.ref or not args.alt):
        raise Exception('Must specify each of chrom, pos, ref, and alt.')

    return args
def supply_args():
    """
    Populate args.
    https://docs.python.org/2.7/library/argparse.html
    """
    parser = argparse.ArgumentParser(description='')
    parser.add_argument('--sam', required=True, help='Input SAM File')
    parser.add_argument('--outfile', required=True, help='Output File')
    parser.add_argument('--ref',
                        required=True,
                        help='Input Reference Sequence')
    parser.add_argument('--ref_build',
                        choices=['hg19'],
                        default='hg19',
                        help='Which reference build to utilize')
    parser.add_argument('--target',
                        choices=['flt3', 'flt3_e13', 'flt3_e14', 'flt3_e15'],
                        default='flt3_e14',
                        help='Region to target')
    parser.add_argument(
        '--coords',
        help='Coordinate range, in the format [chrom:start-stop], 1-based.')
    parser.add_argument('--paired',
                        action='store_true',
                        help='Data is paired-end data.')
    parser.add_argument('--version',
                        action='version',
                        version='%(prog)s ' + VERSION)
    args = parser.parse_args()

    if args.target and args.coords:
        raise Exception(
            "You can't specify both a known target region and a custom coordinate at the same time."
        )
    if not args.target and not args.coords:
        raise Exception("You must specify either the target or coords option.")

    return args
    parser = argparse.ArgumentParser(
        description=__doc__,
        formatter_class=argparse.RawDescriptionHelpFormatter)
    parser.add_argument("hotspots_2d",
                        default="../data/hotspots/v2_multi_type_residue.txt",
                        type=str,
                        help="2D cancerhotspots data file")
    parser.add_argument("hotspots_3d",
                        default="../data/hotspots/3d_hotspots.txt",
                        type=str,
                        help="3D cancerhotspots data file")
    parser.add_argument("--removed_hotspots",
                        default=None,
                        type=str,
                        help='Output removed hotspots')
    args = parser.parse_args()

    hotspots_2d = pd.read_csv(args.hotspots_2d, sep="\t")
    hotspots_2d.columns = [
        c.lower().replace("-", "_") for c in hotspots_2d.columns
    ]
    hotspots_2d['type'] = hotspots_2d.indel_size.fillna(0).apply(
        lambda x: "in-frame indel" if x > 0 else "single residue")
    hotspots_2d.loc[((hotspots_2d.type == "single residue")
                     & hotspots_2d.residue.str.contains("X")),
                    'type'] = "splice site"

    hotspots_3d = pd.read_csv(args.hotspots_3d, sep="\t")
    hotspots_3d.columns = [
        c.lower().replace("-", "_") for c in hotspots_3d.columns
    ]
Exemple #10
0
parser = optparse.OptionParser(description="Generate an annotation report for a set of table from annovar",
    usage="usage: %prog [options] input_conf_file prefered_nm_file_conf annotation_gtf reference_fasta_file chr_accesion_file"
)
parser.add_option("-d", "--hgvs-max-dist", type=int, default=200,
    help="Maximum distance from start of first exon or end of last exon of an nm to report an hgvs position (default : 200)"
)
parser.add_option("-b", "--uta-database", type=str, default=None,
    help="The url of uta database to use (ex : postgresql://anonymous:[email protected]/uta_dev/uta_20150704). If not set the default database is used."
)
parser.add_option("-q", "--mapQ", default=0,
    help="-q option for mpileup for frequency processing and base counting. mapQ threshold (default : 0)"
)
parser.add_option("-Q", "--BAQ", default=0,
    help="-Q option for mpileup for frequency processing and base counting. base quality threshold (default : 0)"
)
(options, args) = parser.parse_args()

if len(args) < 5:
    parser.print_help()
    exit(1)

clinTools.checkSamtoolsVersion('1.1')

inputConfFileName = args[0]
preferedNmFile = args[1]
gtfFileName = args[2]
refFastaFileName = args[3]
chrAccesionFile = args[4]

mesScriptDir = os.path.dirname(os.path.realpath(__file__)) + "/maxentscan/"
scoreTablePath = os.path.dirname(os.path.realpath(__file__)) + "/score_tables/"
import hgvs.assemblymapper

parser = argparse.ArgumentParser(
    description='Retrieve civic variant info from CIViC database')
parser.add_argument('-r',
                    '--ref_dir',
                    type=str,
                    required=True,
                    help='directory for reference files')
parser.add_argument('-o',
                    '--out_dir',
                    type=str,
                    required=True,
                    help='output directory for parsed files')

args = vars(parser.parse_args())
ref_dir = args['ref_dir']
results_dir = args['out_dir']

if not os.path.exists(ref_dir):
    print("Please indicate the reference directory")
    sys.exit(1)
if not os.path.exists(results_dir):
    os.makedirs(results_dir)

with open(os.path.join(ref_dir, "fix_names.json")) as f:
    fix_names = json.load(f)
with open(os.path.join(ref_dir, "gdc_variant_types.json")) as f:
    gdc_variant_types = json.load(f)
with open(os.path.join(ref_dir, "fusion_variant_types.json")) as f:
    fusion_variant_types = json.load(f)
Exemple #12
0
def parse_args():
    """
    Description:
        function 'parse_args' parses arguments from command-line and returns an argparse
        object containing the arguments and their values. Default values are 'False' if option
        is not listed in the command, else the option value is set to True.
    """
    parser = argparse.ArgumentParser(
        description=
        'Fill in hg18, hg19 genomic coordinates and cDNA hgvs strings in merged BRCA variant dataset.'
    )
    parser.add_argument(
        '-i',
        '--inBRCA',
        type=argparse.FileType('r'),
        help='Input ENIGMA BRCA datatable file for conversion.')
    parser.add_argument('-j',
                        '--inHg18',
                        type=argparse.FileType('r'),
                        help='Input hg18 reference genome fasta file.')
    parser.add_argument('-k',
                        '--inHg19',
                        type=argparse.FileType('r'),
                        help='Input hg19 reference genome fasta file.')
    parser.add_argument('-l',
                        '--inHg38',
                        type=argparse.FileType('r'),
                        help='Input hg38 reference genome fasta file.')
    parser.add_argument(
        '-r',
        '--inRefSeq18',
        type=argparse.FileType('r'),
        help='Input refseq annotation hg18-based genepred file.')
    parser.add_argument(
        '-s',
        '--inRefSeq19',
        type=argparse.FileType('r'),
        help='Input refseq annotation hg19-based genepred file.')
    parser.add_argument(
        '-t',
        '--inRefSeq38',
        type=argparse.FileType('r'),
        help='Input refseq annotation hg38-based genepred file.')
    parser.add_argument(
        '-p',
        '--calcProtein',
        dest='calcProtein',
        action='store_true',
        help=
        'Set flag for hgvs protein fill-in. May not result in complete fill-in.'
    )
    parser.add_argument('-o',
                        '--outBRCA',
                        type=argparse.FileType('w'),
                        help='Output filled in ENIGMA BRCA datatable file.')
    parser.add_argument(
        '--artifacts_dir',
        help='Artifacts directory with pipeline artifact files.')

    parser.set_defaults(calcProtein=False)
    options = parser.parse_args()
    return options