コード例 #1
0
 def setup(self):
     r = requests.get('https://civicdb.org/api/variants?count=5000&page=1')
     variants = json.loads(r.text)['records']
     lifter = LiftOver(constants.liftover_chain_paths['hg19'])
     vdict = {}
     for variant in variants:
         chrom_37 = variant['coordinates']['chromosome']
         pos_37 = variant['coordinates']['start']
         if chrom_37 is None or pos_37 is None: continue
         new_coords = lifter.convert_coordinate("chr" + chrom_37,
                                                int(pos_37))
         if len(new_coords) > 0:
             chrom_38 = new_coords[0][0].replace('chr', '')
             pos_38 = new_coords[0][1]
         else:
             continue
         ref = variant['coordinates']['reference_bases']
         alt = variant['coordinates']['variant_bases']
         toks = [chrom_38, pos_38, ref, alt]
         if None not in toks:
             vkey = ':'.join(map(str, toks))
             vdict[vkey] = variant
         else:
             continue
     self.civicdata = vdict
コード例 #2
0
ファイル: bamload.py プロジェクト: alinja/snipsa
def setup_conv(in_build):
    global b3x
    global str_db_file
    global contig
    global contigmt
    global pos_triplet_fn
    global lo_37to38
    global lo_38to37
    print("Loading LiftOver conversion chain file for build %d..." % in_build)
    if in_build == 19:
        b3x = 'b37'
        str_db_file = 'str_hg19.gff3'
        contig = 'chrY'
        contigmt = 'chrM'
        pos_triplet_fn = pos_triplet_37
        lo_37to38 = LiftOver('crossmap/GRCh37_to_GRCh38.chain.gz')
    elif in_build == 37:
        b3x = 'b37'
        str_db_file = 'str_hg19.gff3'
        contig = 'Y'
        contigmt = 'MT'
        pos_triplet_fn = pos_triplet_37
        lo_37to38 = LiftOver('crossmap/GRCh37_to_GRCh38.chain.gz')
    else:
        b3x = 'b38'
        str_db_file = 'str_hg38.gff3'
        contig = 'chrY'
        contigmt = 'chrM'
        pos_triplet_fn = pos_triplet_38
        lo_38to37 = LiftOver('crossmap/GRCh38_to_GRCh37.chain.gz')
コード例 #3
0
    def __init__(self, args):
        self.args = args
        self.doLiftOver = LiftOver('hg19', 'hg38')

        self.lengths_orig = []
        self.lengths_filtered = []
        self.oldVsNew = []
コード例 #4
0
def lift_pos(posvec, chrvec, chainFile):
    logging.info("Lifting genomic positions...")
    nsnps = len(posvec)
    posvec = posvec - 1
    pos_lifted = np.empty((nsnps, ), dtype='int32')
    chr_lifted = np.empty((nsnps, ), dtype='int32')
    pos_indi = np.empty((nsnps, ), dtype='|S10')
    dup_indi = np.empty((nsnps, ), dtype='bool')
    dup_indi.fill(False)
    lift = LiftOver(chainFile)
    for i in range(nsnps):
        if (i + 1) % 200000 == 0:
            logging.info("{} SNPs done".format(i + 1))
        pos = posvec[i]
        chr = 'chr%d' % (chrvec[i], )
        tmp = lift.convert_coordinate(chr, pos)
        if not tmp:
            pos_lifted[i] = pos
            pos_indi[i] = 'miss'
            chr_lifted[i] = chrvec[i]
        elif len(tmp) > 1:
            pos_lifted[i] = tmp[0][1]
            chr_lifted[i] = re.sub('chr', '', tmp[0][0])
            pos_indi[i] = 'multi'
        else:
            pos_lifted[i] = tmp[0][1]
            chr_lifted[i] = re.sub('chr', '', tmp[0][0])
            if pos == tmp[0][1]:
                pos_indi[i] = 'unchanged'
            else:
                pos_indi[i] = 'lifted'
    return pos_lifted + 1, pos_indi, chr_lifted
コード例 #5
0
 def __init__(self, regionsFileName, hg):
     with open(regionsFileName, 'r') as f:
         self.regionsDict = json.load(f)
     f.close()
     self.lo = None
     if hg != 'hg38':
         self.lo = LiftOver(hg, 'hg38')
コード例 #6
0
def liftover_to_19(loc, build):
    floc = [loc.split(':')[0], loc.split(':')[1]]
    lo = LiftOver(os.path.join(chainpath, chains.get(build)))
    con_pos = lo.convert_coordinate(*floc)
    if con_pos:
        return int(con_pos[0][1])
    return NaN
コード例 #7
0
 def setup(self):
     self.civicdata = {}
     lifter = LiftOver(constants.liftover_chain_paths['hg19'])
     page_url = 'https://civicdb.org/api/variants?count=500&page=1'
     while page_url is not None:
         try:
             r = requests.get(page_url, timeout=5)
         except requests.exceptions.ConnectionError:
             msg = 'ERROR: Incomplete CIVIC data load'
             print(msg)
             self.logger.error(msg)
             break
         d = json.loads(r.text)
         records = d['records']
         page_url = d['_meta']['links']['next']
         for variant in records:
             chrom_37 = variant['coordinates']['chromosome']
             pos_37 = variant['coordinates']['start']
             if chrom_37 is None or pos_37 is None: continue
             new_coords = lifter.convert_coordinate("chr" + chrom_37,
                                                    int(pos_37))
             if len(new_coords) > 0:
                 chrom_38 = new_coords[0][0].replace('chr', '')
                 pos_38 = new_coords[0][1]
             else:
                 continue
             ref = variant['coordinates']['reference_bases']
             alt = variant['coordinates']['variant_bases']
             toks = [chrom_38, pos_38, ref, alt]
             if None not in toks:
                 vkey = ':'.join(map(str, toks))
                 self.civicdata[vkey] = variant
             else:
                 continue
コード例 #8
0
ファイル: Seq.py プロジェクト: noahpieta/IPyRSSA
 def __init__(self, from_db, to_db):
     """
     from_db         -- 'hg19','hg38','mm9','mm10'
     to_db           -- 'hg19','hg38','mm9','mm10'
     """
     from pyliftover import LiftOver
     
     LiftOver.__init__(self, from_db=from_db, to_db=to_db)
コード例 #9
0
ファイル: cli.py プロジェクト: harrispopgen/mutyper
def ancestral_fasta(args):
    """subroutine for ancestor subcommand
    """
    # single chromosome fasta file for reference genome
    ref = pyfaidx.Fasta(args.reference, read_ahead=10000)
    # make a copy to build our ancestor for this chromosome
    copyfile(args.reference, args.output)
    anc = pyfaidx.Fasta(args.output, read_ahead=10000, mutable=True)
    # reference genome for outgroup species (all chromosomes)
    out = pyfaidx.Fasta(args.outgroup, read_ahead=10000)
    # outgroup to reference alignment chain file
    lo = LiftOver(args.chain)
    # snps database for the same chromosome
    vcf = cyvcf2.VCF(args.vcf)

    # change regions outside of callability mask to all N bases
    if args.bed:
        if args.bed == '-':
            bed = sys.stdin
        else:
            bed = open(args.bed, 'r')
        last_end = 0
        for line in bed:
            chrom, start, end = line.rstrip().split('\t')[:3]
            start = int(start)
            anc[chrom][last_end:start] = 'N' * (start - last_end)
            last_end = int(end)
        anc[chrom][last_end:len(anc[chrom])] = 'N' * (len(anc[chrom]) -
                                                      last_end)

    for variant in vcf:
        # change variants that are not biallelic SNPs to N bases
        if not (variant.is_snp and len(variant.ALT) == 1):
            anc[variant.CHROM][variant.start:variant.end] = 'N' * (
                variant.end - variant.start)
        else:
            out_coords = lo.convert_coordinate(variant.CHROM, variant.start)
            # change ambiguously aligning sites to N bases
            if out_coords is None or len(out_coords) != 1:
                anc[variant.CHROM][variant.start] = 'N'
            else:
                if variant.REF != ref[variant.CHROM][
                        variant.start].seq.upper():
                    raise ValueError(f'variant reference allele {variant.REF} '
                                     f'mismatches reference sequence '
                                     f'{ref[variant.CHROM][variant.start]}')
                out_chromosome, out_position, out_strand = out_coords[0][:3]
                out_allele = out[out_chromosome][out_position].seq
                # if negative strand, take reverse complement base
                if out_strand == '-':
                    out_allele = reverse_complement(out_allele)
                # and finally, polarize
                if out_allele.upper() == variant.ALT[0]:
                    anc[variant.CHROM][variant.start] = out_allele
                elif out_allele.upper() != variant.REF:
                    # triallelic
                    anc[variant.CHROM][variant.start] = 'N'
コード例 #10
0
def main():

    usage = "\n\n\tusage: {} cancer_introns.b38.annot_ready.tsv hg38ToHg19.over.chain.gz > cancer_introns.b37.annot_ready.tsv\n\n".format(
        sys.argv[0])

    if len(sys.argv) < 3:
        print(usage, file=sys.stderr)
        sys.exit(1)

    cancer_introns_file = sys.argv[1]
    hg_chain_file = sys.argv[2]

    lo = LiftOver('hg38ToHg19.over.chain.gz')

    with open(cancer_introns_file, 'rt') as fh:
        header = next(fh)
        header = header.rstrip()
        print(header)
        for line in fh:
            line = line.rstrip()
            vals = line.split("\t")
            intron = vals[0]
            chr, coordset = intron.split(":")
            (lend, rend) = coordset.split("-")
            lend = int(lend)
            rend = int(rend)

            new_lend = lo.convert_coordinate(chr, lend - 1)
            #print("new_lend: {}".format(str(new_lend)))
            new_rend = lo.convert_coordinate(chr, rend - 1)
            #print("new_rend: {}".format(str(new_rend)))
            if new_lend and new_rend:

                new_lend_chr = new_lend[0][0]
                new_lend_coord = new_lend[0][1] + 1

                new_rend_chr = new_rend[0][0]
                new_rend_coord = new_rend[0][1] + 1

                if new_lend_chr != new_rend_chr or new_lend_chr != chr:
                    sys.stderr.write("-failed conversion of {}".format(line) +
                                     "  --> {} {}, {} {}\n".format(
                                         new_lend_chr, new_lend_coord,
                                         new_rend_chr, new_rend_coord))
                    continue

                if new_lend_coord > new_rend_coord:
                    (new_lend_coord, new_rend_coord) = (new_rend_coord,
                                                        new_lend_coord)

                new_intron_feature = "{}:{}-{}".format(chr, new_lend_coord,
                                                       new_rend_coord)
                vals[0] = new_intron_feature
                print("\t".join(vals))

    sys.exit(0)
コード例 #11
0
def main(args):
    # open input vcf
    vcf = vcf_parser.Vcf(args['inputfile'])
    # add 3 new tag definitions - for hg19 liftover: chr, pos, and end
    hg19CHROM_definition = '##INFO=<ID=hg19_chr,Number=1,Type=String,Description="CHROM in hg19 using LiftOver from pyliftover">'
    hg19POS_definition = '##INFO=<ID=hg19_pos,Number=1,Type=Integer,Description="POS in hg19 using LiftOver from pyliftover (converted back to 1-based)">'
    hg19END_definition = '##INFO=<ID=hg19_end,Number=1,Type=Integer,Description="END in hg19 using LiftOver from pyliftover (converted back to 1-based)">'
    vcf.header.add_tag_definition(hg19END_definition)
    vcf.header.add_tag_definition(hg19POS_definition)
    vcf.header.add_tag_definition(hg19CHROM_definition)

    # get chain file for liftover
    lo = LiftOver(args['chainfile'])

    # write header and then loop variants, adding liftover coordiantes to INFO fields when appropriate. write all variants.
    with open(args['outputfile'], 'w') as fo:
        vcf.write_header(fo)
        for vnt_obj in vcf.parse_variants():

            # generate hg19 LO coordinates based on CHROM and POS
            hits = lo.convert_coordinate(vnt_obj.CHROM, vnt_obj.POS-1)
            if len(hits) > 0:
                #add hg19_chr
                hg19CHROM_value = 'hg19_chr='+hits[0][0].split('chr')[1]
                vnt_obj.add_tag_info(hg19CHROM_value)
                #add hg19_pos
                hg19POS_value = 'hg19_pos='+str(hits[0][1]+1)
                vnt_obj.add_tag_info(hg19POS_value)

            # also want to incorporate END position for SV and CNV
            # check if "END" exists in INFO and if it does, try a liftover
            try:
                END = int(vnt_obj.INFO.split("END=")[1].split(";")[0])
            except:
                END = ''

            if END != '':
                hits_end = lo.convert_coordinate(vnt_obj.CHROM, END-1)
                if len(hits_end) > 0:
                    try:
                        #if hg19_chr is already defined, don't add it
                        vnt_obj.get_tag_value("hg19_chr")
                        #add hg19_end
                        hg19END_value = 'hg19_end='+str(hits_end[0][1]+1)
                        vnt_obj.add_tag_info(hg19END_value)
                    except:
                        #if hg19_chr is not defined, add hg19_chr
                        hg19CHROM_value = 'hg19_chr='+hits_end[0][0].split('chr')[1]
                        vnt_obj.add_tag_info(hg19CHROM_value)
                        #add hg19_end
                        hg19END_value = 'hg19_end='+str(hits_end[0][1]+1)
                        vnt_obj.add_tag_info(hg19END_value)
            vcf.write_variant(fo, vnt_obj)

    subprocess.run(["bgzip", args['outputfile']])
    subprocess.run(["tabix",args['outputfile']+".gz"])
コード例 #12
0
def try_find_build(rs, pos):
    snps_info = fetch_snps(rs)
    #snps_info = [('rs3737728', 'GRCh38.p2', '1', '1086035'), ('rs3934834', 'GRCh38.p2', '1', '1070426'), ('rs9651273', 'GRCh38.p2', '1', '1096160')]
    logging.info("Loading liftover chain files...")
    lift38_19 = LiftOver('pyliftover/hg38ToHg19.over.chain.gz')
    lift19_18 = LiftOver('pyliftover/hg19ToHg18.over.chain.gz')
    lift19_17 = LiftOver('pyliftover/hg19ToHg17.over.chain.gz')
    logging.info("Done")

    for (rsId, build, true_chr, pos_hg38), source_pos in zip(snps_info, pos):
        try:
            #if build != 'GRCh38.p2':  # assume a specific build we get from Entrez.efetch(db='SNP')
            #    continue
            source_pos -= 1
            pos_hg19 = lift38_19.convert_coordinate('chr{}'.format(true_chr),
                                                    int(pos_hg38) - 1)[0][1]
            pos_hg18 = lift19_18.convert_coordinate('chr{}'.format(true_chr),
                                                    pos_hg19)[0][1]
            pos_hg17 = lift19_17.convert_coordinate('chr{}'.format(true_chr),
                                                    pos_hg19)[0][1]
            print(
                "build={} {} chr{} source={} hg38={}{} hg19={}{} hg18={}{} hg17={}{}"
                .format(build, rsId, true_chr, source_pos, pos_hg38,
                        '*' if pos_hg38 == source_pos else '', pos_hg19,
                        '*' if pos_hg19 == source_pos else '', pos_hg18,
                        '*' if pos_hg18 == source_pos else '', pos_hg17,
                        '*' if pos_hg17 == source_pos else ''))
        except:
            pass
コード例 #13
0
    def liftover(self):

        # todo
        # Not sure what the failure mode of this tool is.  Will probably need to write a try catch eventually
        # Changing the chromosome and position messes up the key as well.  Could probably fix that.  But i don't have
        # the ref and alt alleles on hand and I don't want to parse them out of chromosomeHgvsName.

        from pyliftover import LiftOver
        lo = LiftOver('hg38', self.build)
        lifted = lo.convert_coordinate(self.chromosome, self.position)

        self.chromosome = lifted[0][0]
        self.position = lifted[0][1]
コード例 #14
0
def from_hg18_to_hg19(chr, coord):
    """
    object to perform hg18 --> hg19 conversion.
    ----------- REMEMBER that LIFT-OVER coordinates are 0-based!!!
    ----------- ADD +1 to obtain a values in 1-based coordinate!!
    :param chr: chromosome name, e.g. 'chr6'
    :param coord: integer, e.g. 10000
    :return: coord in hg coordinates system
    """
    lo = LiftOver('hg18', 'hg19')
    conv = lo.convert_coordinate(chr, int(coord)+1)
    hg19_coord = conv[0][1]
    return hg19_coord
コード例 #15
0
 def _parse_cmd_args(self, args):
     """ Parse the arguments in sys.argv """
     parser = argparse.ArgumentParser()
     parser.add_argument('path',
                         help='Path to this converter\'s python module')
     parser.add_argument('inputs',
                         nargs='+',
                         help='Files to be converted to .crv')
     parser.add_argument('-f',
                         dest='format',
                         help='Specify an input format')
     parser.add_argument('-n',
                         '--name',
                         dest='name',
                         help='Name of job. Default is input file name.')
     parser.add_argument('-d', '--output-dir',
                         dest='output_dir',
                         help='Output directory. '\
                              +'Default is input file directory.')
     parser.add_argument(
         '-l',
         '--liftover',
         dest='liftover',
         choices=['hg38'] + list(constants.liftover_chain_paths.keys()),
         default='hg38',
         help='Input gene assembly. Will be lifted over to hg38')
     parsed_args = parser.parse_args(args)
     self.input_paths = [os.path.abspath(x) for x in parsed_args.inputs]
     if parsed_args.format:
         self.input_format = parsed_args.format
     self.input_dir = os.path.dirname(self.input_paths[0])
     if parsed_args.output_dir:
         self.output_dir = parsed_args.output_dir
     else:
         self.output_dir = self.input_dir
     if not (os.path.exists(self.output_dir)):
         os.makedirs(self.output_dir)
     if parsed_args.name:
         self.output_base_fname = parsed_args.name
     else:
         self.output_base_fname = os.path.basename(self.input_paths[0])
     self.input_assembly = parsed_args.liftover
     self.do_liftover = self.input_assembly != 'hg38'
     if self.do_liftover:
         self.lifter = LiftOver(
             constants.liftover_chain_paths[self.input_assembly])
     else:
         self.lifter = None
     self.status_fpath = os.path.join(
         self.output_dir, self.output_base_fname + '.status.json')
コード例 #16
0
def pyliftover(hg38_chrom, hg38_coord):
    hg38_key = '%s:%s' % (hg38_chrom, hg38_coord)

    if hg38_key not in pyliftover_dict:
        lo = LiftOver(config.input_dir + 'hg38ToHg19.over.chain.gz')
        result = lo.convert_coordinate(hg38_chrom, int(hg38_coord))

        if result is not None:
            coords_list = result[0]

            pyliftover_dict[hg38_key] = {
                'chrom': coords_list[0],
                'coord': str(coords_list[1])
            }

    return pyliftover_dict[hg38_key]
コード例 #17
0
def create_lo(input_version, output_version):
    lo = LiftOver(input_version, output_version)
    return {
        "input_version": input_version,
        "output_version": output_version,
        "lo": lo
    }
コード例 #18
0
 def hgVersionJudge(self, nowVersion):
     if (int(nowVersion) != 19):
         strs = 'hg' + str(nowVersion)
         lo = LiftOver(strs, 'hg19')
         return lo
     else:
         return 0
コード例 #19
0
def get_schic_contacts(filename):

    all_contacts = np.loadtxt(filename, dtype=str)

    # filter for cis chrX contacts
    contacts = all_contacts[(all_contacts[:, 0] == 'chrX')
                            & (all_contacts[:, 2] == 'chrX')]
    contacts = contacts[:, (1, 3)].astype(int)

    # lift over all contacts from mm10 to mm9
    lo = LiftOver('mm10', 'mm9')

    def do_lift(loc):
        lifted_loc = lo.convert_coordinate('chrX', loc)
        if len(lifted_loc) == 1:
            return lifted_loc[0][1]
        elif len(lifted_loc) > 1:
            raise ("Non-unique liftover result")
        else:
            print "Locus {} not in mm9 assembly".format(loc)

    lifted_contacts = np.array(
        zip(map(do_lift, contacts[:, 0]), map(do_lift, contacts[:, 1])))

    # keep only contacts in genomic region of interest
    contacts = contacts[(contacts[:, 0] >= coords_min)
                        & (contacts[:, 1] <= coords_max)]

    return contacts
コード例 #20
0
class Converter:
    def __init__(self):
        ## lo = LiftOver("/opt/data/misc/hg38ToHg19.over.chain.gz")
        self.lo = LiftOver('hg19', 'hg38')

    def hg38(self, ch, pos):
        ch = str(ch).upper()
        if (ch.isdigit() or ch == 'X' or ch == 'Y'):
            ch = "chr{}".format(ch)
        try:
            coord = self.lo.convert_coordinate(ch, pos - 1)
        except:
            print "WARNING: HG38 conversion at {}:{}".format(ch, pos)
            coord = None
        if (not coord):
            return None
        if (len(coord) == 0):
            return "No Match"
        r = coord[0][1] + 1
        if (len(coord) == 1):
            return r
        return r, coord

    def close(self):
        return
コード例 #21
0
ファイル: vcf.py プロジェクト: zjwang6/jcvi
    def __init__(self, chainfile):
        """
        This object will perform unique single positional liftovers - it will only lift over chromosome positions that
        map unique to the new genome and if the strand hasn't changed.
        Note: You should run a VCF Normalization sweep on all lifted ofer CPRAs to check for variants that need to be
        re-normalized, and to remove variants where the REF now doesn't match after a liftover.
        The combination of these steps will ensure high quality liftovers. However, it should be noted that this won't
        prevent the situation where multiple positions in the old genome pile up uniquely in the new genome, so one
        needs to check for this.
        It's organised as an object rather than a collection of functions  so that the LiftOver chainfile
        only gets opened/passed once and not for every position to be lifted over.
        :param chainfile: A string containing the path to the local UCSC .gzipped chainfile
        :return:
        """

        self.liftover = LiftOver(chainfile)
コード例 #22
0
async def live_annotate(input_data, annotators):
    from cravat.constants import mapping_parser_name
    from cravat.constants import all_mappings_col_name
    from cravat.inout import AllMappingsParser
    global live_modules
    global live_mapper
    global module_confs
    global modules_to_run_ordered
    response = {}
    assembly = input_data.get('assembly', 'hg38')
    if assembly in cravat.constants.liftover_chain_paths:
        lifter = LiftOver(cravat.constants.liftover_chain_paths[assembly])
        chrom, pos, ref, alt = liftover(input_data, lifter)
        input_data['chrom'] = chrom
        input_data['pos'] = pos
        input_data['ref'] = ref
        input_data['alt'] = alt
    crx_data = live_mapper.map(input_data)
    crx_data = live_mapper.live_report_substitute(crx_data)
    crx_data[mapping_parser_name] = AllMappingsParser(
        crx_data[all_mappings_col_name])
    for module_name in modules_to_run_ordered:
        module = live_modules[module_name]
        if annotators is not None and module_name not in annotators:
            continue
        try:
            conf = module_confs[module_name]
            json_colnames = []
            for col in conf['output_columns']:
                if 'table' in col and col['table'] == True:
                    json_colnames.append(col['name'])
            if 'secondary_inputs' in conf:
                sec_mods = conf['secondary_inputs']
                secondary_data = {}
                for sec_mod in sec_mods:
                    secondary_data[sec_mod] = [response[sec_mod]]
                annot_data = module.annotate(input_data=crx_data,
                                             secondary_data=secondary_data)
            else:
                annot_data = module.annotate(input_data=crx_data)
            annot_data = module.live_report_substitute(annot_data)
            if annot_data == '' or annot_data == {}:
                annot_data = None
            elif type(annot_data) is dict:
                annot_data = clean_annot_dict(annot_data)
            if annot_data is not None:
                for colname in json_colnames:
                    json_data = annot_data.get(colname, None)
                    if json_data is not None and type(json_data) == str:
                        json_data = json.loads(json_data)
                    annot_data[colname] = json_data
            response[module_name] = annot_data
        except Exception as e:
            import traceback
            traceback.print_exc()
            response[module_name] = None
    del crx_data[mapping_parser_name]
    set_crx_canonical(crx_data)
    response['crx'] = crx_data
    return response
コード例 #23
0
class UniqueLiftover(object):
    def __init__(self, chainfile):
        """
        This object will perform unique single positional liftovers - it will only lift over chromosome positions that
        map unique to the new genome and if the strand hasn't changed.
        Note: You should run a VCF Normalization sweep on all lifted ofer CPRAs to check for variants that need to be
        re-normalized, and to remove variants where the REF now doesn't match after a liftover.
        The combination of these steps will ensure high quality liftovers. However, it should be noted that this won't
        prevent the situation where multiple positions in the old genome pile up uniquely in the new genome, so one
        needs to check for this.
        It's organised as an object rather than a collection of functions  so that the LiftOver chainfile
        only gets opened/passed once and not for every position to be lifted over.
        :param chainfile: A string containing the path to the local UCSC .gzipped chainfile
        :return:
        """

        self.liftover = LiftOver(chainfile)

    def liftover_cpra(self, chromosome, position, verbose=False):
        """
        Given chromosome, position in 1-based co-ordinates,
        This will use pyliftover to liftover a CPRA, will return a (c,p) tuple or raise NonUniqueLiftover if no unique
        and strand maintaining liftover is possible
        :param chromosome: string with the chromosome as it's represented in the from_genome
        :param position: position on chromosome (will be cast to int)
        :param verbose: print verbose information for debugging
        :return: ((str) chromosome, (int) position) or None if no liftover
        """

        chromosome = str(chromosome)
        position = int(position)

        # Perform the liftover lookup, shift the position by 1 as pyliftover deals in 0-based co-ords
        new = self.liftover.convert_coordinate(chromosome, position - 1)
        # This has to be here as new will be NoneType when the chromosome doesn't exist in the chainfile
        if new:
            # If the liftover is unique
            if len(new) == 1:
                # If the liftover hasn't changed strand
                if new[0][2] == "+":
                    # Set the co-ordinates to the lifted-over ones and write out
                    new_chromosome = str(new[0][0])
                    # Shift the position forward by one to convert back to a 1-based co-ords
                    new_position = int(new[0][1]) + 1
                    return new_chromosome, new_position
                else:
                    exception_string = (
                        "{},{} has a flipped strand in liftover: {}".format(
                            chromosome, position, new))
            else:
                exception_string = "{},{} lifts over to multiple positions: {}".format(
                    chromosome, position, new)
        elif new is None:
            exception_string = "Chromosome '{}' provided not in chain file".format(
                chromosome)

        if verbose:
            logging.error(exception_string)
        return None, None
コード例 #24
0
def liftover(pos, chro, from_assembly, to_assembly):
    """
        LiftOver a specific coordinate between assemblies using the UCSC LiftOver tool

        NOTE:   pyLiftover uses base 0, whereas coordinate system uses base 1
                therefore position 27107251 is actually 27107250 in pyLiftover
        """
    if from_assembly == to_assembly:
        return pos

    chro = 'chr' + str(chro)
    pos = int(pos)

    lo = LiftOver(from_assembly, to_assembly)
    out = lo.convert_coordinate(chro, pos)

    return out[0][1]
コード例 #25
0
    def liftover(self, chromosome, position, build='hg19'):

        # todo
        # Not sure what the failure mode of this tool is.  Will probably need to write a try catch eventually
        # Changing the chromosome and position messes up the key as well.  Could probably fix that.  But i don't have
        # the ref and alt alleles on hand and I don't want to parse them out of chromosomeHgvsName.

        lo = LiftOver('hg38', build)
        lifted = lo.convert_coordinate(chromosome, position)

        new_chromosome = lifted[0][0]
        new_position = lifted[0][1]

        if self.debug:
            print("%s %s -> %s %s" % (chromosome, position, new_chromosome, new_position))

        return new_chromosome, new_position
コード例 #26
0
ファイル: map_refs.py プロジェクト: drmrgd/biofx_utils
def main(coords, orig_assembly, new_assembly, chainfile, outfh):
    # Create a LiftOver object with desired mapping.
    lo = LiftOver(orig_assembly, new_assembly)

    results = []
    for coord in coords:
        try:
            chrom, pos = coord.split(':')
            # No idea why, but pos needs to be an int instead of a str!
            returnval = lo.convert_coordinate(chrom, int(pos))[0]
            results.append((chrom, pos,) + returnval)
        except:
            # Not sure what kinds of errors we can get.  I think if a locus is
            # deleted, we'll get None as a result (which we'll want to handle),
            # but apart from that, not sure what to expect.  
            sys.stderr.write('Offending coord: %s' % coord)
            raise

    print_results(results, outfh)
コード例 #27
0
ファイル: vcf.py プロジェクト: Hensonmw/jcvi
class UniqueLiftover(object):

    def __init__(self, chainfile):
        """
        This object will perform unique single positional liftovers - it will only lift over chromosome positions that
        map unique to the new genome and if the strand hasn't changed.
        Note: You should run a VCF Normalization sweep on all lifted ofer CPRAs to check for variants that need to be
        re-normalized, and to remove variants where the REF now doesn't match after a liftover.
        The combination of these steps will ensure high quality liftovers. However, it should be noted that this won't
        prevent the situation where multiple positions in the old genome pile up uniquely in the new genome, so one
        needs to check for this.
        It's organised as an object rather than a collection of functions  so that the LiftOver chainfile
        only gets opened/passed once and not for every position to be lifted over.
        :param chainfile: A string containing the path to the local UCSC .gzipped chainfile
        :return:
        """

        self.liftover = LiftOver(chainfile)

    def liftover_cpra(self, chromosome, position, verbose=False):
        """
        Given chromosome, position in 1-based co-ordinates,
        This will use pyliftover to liftover a CPRA, will return a (c,p) tuple or raise NonUniqueLiftover if no unique
        and strand maintaining liftover is possible
        :param chromosome: string with the chromosome as it's represented in the from_genome
        :param position: position on chromosome (will be cast to int)
        :return: ((str) chromosome, (int) position) or None if no liftover
        """

        chromosome = str(chromosome)
        position = int(position)

        # Perform the liftover lookup, shift the position by 1 as pyliftover deals in 0-based co-ords
        new = self.liftover.convert_coordinate(chromosome, position - 1)
        # This has to be here as new will be NoneType when the chromosome doesn't exist in the chainfile
        if new:
            # If the liftover is unique
            if len(new) == 1:
                # If the liftover hasn't changed strand
                if new[0][2] == "+":
                    # Set the co-ordinates to the lifted-over ones and write out
                    new_chromosome = str(new[0][0])
                    # Shift the position forward by one to convert back to a 1-based co-ords
                    new_position = int(new[0][1]) + 1
                    return new_chromosome, new_position
                else:
                    exception_string = "{},{} has a flipped strand in liftover: {}".format(chromosome, position, new)
            else:
                exception_string = "{},{} lifts over to multiple positions: {}".format(chromosome, position, new)
        elif new is None:
            exception_string = "Chromosome '{}' provided not in chain file".format(chromosome)

        if verbose:
            logging.error(exception_string)
        return None, None
コード例 #28
0
def main():

    # Parse args
    args = parse_args()
    confidence_orders = ['High', 'Medium', 'Low'] # Used to sort "highest" confidence

    # Load gold-standards
    gold_standards = load_gold_standards(args.input_pattern)

    # Create liftOver instances from chain files
    if args.grch37_to_38:
        args.grch37_to_38 = LiftOver(args.grch37_to_38)
    if args.grch38_to_37:
        args.grch38_to_37 = LiftOver(args.grch38_to_37)

    # Iterate over and process records
    out_data = []
    for record in gold_standards:

        # Lift-over positions to all assemblies
        record['sentinel_variant'] = fill_in_assemblies(
            record['sentinel_variant'],
            args.grch37_to_38,
            args.grch38_to_37
        )

        # Extract highest confidence
        record['gold_standard_info']['highest_confidence'] = sorted(
            [entry['confidence'] for entry in
             record['gold_standard_info']['evidence']],
            key=lambda x: confidence_orders.index(x)
        )[0]

        out_data.append(record)
    
    # Write output
    if not os.path.exists(os.path.dirname(args.output)):
        os.makedirs(os.path.dirname(args.output), exist_ok=True)
    with open(args.output, 'w') as out_h:
        json.dump(out_data, out_h, ensure_ascii=False, indent=2)

    return 0
コード例 #29
0
def get_liftover(frm=19, to=38):
    """
    Info: http://hgdownload.cse.ucsc.edu/downloads.html
    """
    from pyliftover import LiftOver
    liftoverfile = 'hg{}ToHg{}.over.chain.gz'.format(frm, to)
    try:
        return LiftOver(processedDataStorage + liftoverfile)
    except FileNotFoundError:
        raise FileNotFoundError(
            'Source: http://hgdownload.cse.ucsc.edu/gbdb/hg{}/liftOver/{}'.
            format(frm, liftoverfile))
コード例 #30
0
def liftover_cho(df):
    lo = LiftOver('hg18', 'hg38')
    def lift_coord(row):
        chrom = 'chr' + str(row['Chromosome'])
        pos = row['Genomic position'] - 1
        result = lo.convert_coordinate(chrom, pos)
        if len(result) == 0:
            print(f"Didn't find hg38 coordinate for {row['Chromosome']}:{row['Genomic position']}")
            return 'NA'
        return result[0][1] + 1
    df['Genomic position'] = df.apply(lift_coord, axis=1)
    return df
コード例 #31
0
class liftover:
    def __init__(self, build_from, build_to):
        # Source Genome Build
        if build_from in map_release.values():
            self.build_from = build_from
        else:
            build_mapped = map_release.get(build_from)
            if build_mapped is None:
                raise Exception(
                    'Unknown SOURCE genome build. The value was: {}'.format(
                        build_from))
            else:
                self.build_from = build_mapped

        # Destination Genome Build
        if build_to in map_release.values():
            self.build_to = build_to
        else:
            build_mapped = map_release.get(build_to)
            if build_mapped is None:
                raise Exception(
                    'Unknown DESTINATION genome build. The value was: {}'.
                    format(build_from))
            else:
                self.build_to = build_mapped

        # Download/Source the Chain from UCSC
        if self.build_from != self.build_to:
            self.GetChain()
        else:
            self.chain = None

    def GetChain(self):
        '''Downloads the chain from UCSC '''
        self.chain_name = 'UCSC: {} to {}'.format(self.build_from,
                                                  self.build_to)
        self.chain = LiftOver(self.build_from, self.build_to)

    def lift(self, chr, pos):
        lifted = self.chain.convert_coordinate(
            'chr{}'.format(str(chr)), int(pos)
        )  # ToDo figure out whether this step should be adjusted for 0/1 indexing?
        if lifted is not None:
            if len(lifted) == 1:
                return lifted[0][0][3:], int(
                    lifted[0][1]), False  # Only 1 position
            if len(lifted) > 1:
                return lifted[0][0][3:], int(
                    lifted[0][1]), True  # Multiple positions (take first)
            else:
                return None, None, None
        else:
            return None, None, None
コード例 #32
0
class CravatAnnotator(BaseAnnotator):

    def setup(self): 
        chain_path = os.path.join(os.path.dirname(os.path.realpath(__file__)), 'data', 'hg38ToHg19.over.chain')
        self.liftover = LiftOver(chain_path)
    
    def annotate(self, input_data, secondary_data=None):
        out = {}
        hg19_data = self.liftover.convert_coordinate(input_data['chrom'], int(input_data['pos']) - 1)
        if len(hg19_data) > 0:
            out['chrom'] = hg19_data[0][0]
            out['pos'] = hg19_data[0][1] + 1
        return out
コード例 #33
0
def main(coords, orig_assembly, new_assembly, chainfile, outfh):
    # Create a LiftOver object with desired mapping.
    lo = LiftOver(orig_assembly, new_assembly)

    results = []
    for coord in coords:
        try:
            chrom, pos = coord.split(':')
            # No idea why, but pos needs to be an int instead of a str!
            returnval = lo.convert_coordinate(chrom, int(pos))[0]
            results.append((
                chrom,
                pos,
            ) + returnval)
        except:
            # Not sure what kinds of errors we can get.  I think if a locus is
            # deleted, we'll get None as a result (which we'll want to handle),
            # but apart from that, not sure what to expect.
            sys.stderr.write('Offending coord: %s' % coord)
            raise

    print_results(results, outfh)
コード例 #34
0
ファイル: vcf.py プロジェクト: Hensonmw/jcvi
    def __init__(self, chainfile):
        """
        This object will perform unique single positional liftovers - it will only lift over chromosome positions that
        map unique to the new genome and if the strand hasn't changed.
        Note: You should run a VCF Normalization sweep on all lifted ofer CPRAs to check for variants that need to be
        re-normalized, and to remove variants where the REF now doesn't match after a liftover.
        The combination of these steps will ensure high quality liftovers. However, it should be noted that this won't
        prevent the situation where multiple positions in the old genome pile up uniquely in the new genome, so one
        needs to check for this.
        It's organised as an object rather than a collection of functions  so that the LiftOver chainfile
        only gets opened/passed once and not for every position to be lifted over.
        :param chainfile: A string containing the path to the local UCSC .gzipped chainfile
        :return:
        """

        self.liftover = LiftOver(chainfile)
コード例 #35
0
        interval = intrxn[0].split(":")[1].split("-")
    elif RNAtoplot in intrxn[1] and partner in intrxn[0]:
        interval = intrxn[1].split(":")[1].split("-")
    if len(interval) == 2:
        for i in range(int(interval[0]), int(interval[1])):
            dist[i] += 1
print "RNA size:", len(dist)


#Use the following part to liftover mouse coordinates to human 
liftfiles = {"mm28S": "/Users/lu/Documents/chang/rrna/liftover/mmtohs28S.liftoverchain", \
"mm45S": "/Users/lu/Documents/chang/rrna/liftover/mmtohs45S.liftoverchain", \
"Malat1": "/Users/lu/Documents/chang/psoralen/examples/MALAT1/mmtohg_Malat1.liftoverchain"}
if RNAtoplot in liftfiles:
    newdist = [0 for i in range(0, size)]
    lo = LiftOver(liftfiles[RNAtoplot])
    for i in range(0, size):
        lifted = lo.convert_coordinate(RNAtoplot, i, '+')
        if lifted: newdist[lifted[0][1]] += dist[i]
    dist = newdist



figure = plt.figure(figsize=(8,2))
axes = plt.Axes(figure, [.3,.3,.6,.6])
figure.add_axes(axes)
plt.bar(range(0, size), dist, color='k')
axes.spines['top'].set_visible(False)
axes.spines['right'].set_visible(False)
axes.yaxis.set_ticks_position('left')
axes.xaxis.set_ticks_position('bottom')
コード例 #36
0
__author__ = 'rajaram'

#Reference : https://pypi.python.org/pypi/pyliftover
#Left over data : http://hgdownload.cse.ucsc.edu/gbdb/hg38/liftOver/

from pyliftover import LiftOver
#lo = LiftOver('hg38', 'hg19')
lo = LiftOver('hg38ToHg19.over.chain.gz')
for x in range(0, 100):
    data = lo.convert_coordinate('chr1', 1000000+x)
    print data
    data2 = data.pop()
    print data2[0]
コード例 #37
0
    def addTSSInfo(self, vcfInputFile):
        vcf_reader = vcf.Reader(open(vcfInputFile, 'r'))
        vcf_reader.infos['TSSOL'] = VcfInfo('TSSOL', vcf_field_counts['A'], 'String',
                                            'Info indicates whether the variant overlapping with the'
                                            ' transcription start site(TSS)')

        vcf_writer = vcf.VCFWriter(open('output.vcf', 'w'), vcf_reader)

        query = SPARQLQueries.sparqlQueries()

        totalVar = 0
        tssOLVar = 0

        lo = LiftOver('hg38ToHg19.over.chain.gz')

        for record in vcf_reader:
            variantStart = record.start
            variantEnd = record.end
            variantChromosome = record.CHROM
            variantSubType = record.var_subtype
            isOverlapping = False



            # Adding chr prefix to the chromosome
            if "chr" not in variantChromosome:
                variantChromosome = "chr"+str(record.CHROM)

            #liftover from hg20 to hg19
            data = lo.convert_coordinate(variantChromosome, variantStart)

            #print variantChromosome
            print variantStart
            print variantEnd


            if ((data != None)):
                data2 = data.pop()

                variantChromosomehg19 = data2[0]
                variantStarthg19 = data2[1]



                data = lo.convert_coordinate(variantChromosome, variantEnd)
                data2 = data.pop()

                variantEndhg19 = data2[1]



                # SPARQL query
                result = query.getTSS('http://ep.dbcls.jp/fantom5/sparql', variantStarthg19, variantEndhg19, variantChromosomehg19)

                for row in result:

                    values = sparql.unpack_row(row)
                    cageStart = values[1]
                    cageEnd = values[2]

                    if ((variantSubType == 'ins') & ( variantStart > cageStart )):
                        isOverlapping = True
                        tssOLVar = tssOLVar+1
                        break
                    elif ((variantSubType != 'ins') & (cageStart > 0)):
                       isOverlapping = True
                       tssOLVar = tssOLVar+1
                    break

                totalVar = totalVar+1
                record.add_info('TSSOL', [isOverlapping])
            else:
                print "No liftover found for this pos = "+record.ID

            vcf_writer.write_record(record)

            print "No of variants = "+str(totalVar)
            print "No of tss overlapping variants = "+str(tssOLVar)
コード例 #38
0
ファイル: seq.py プロジェクト: Tsinghua-gongjing/test
 def __init__(self, from_db, to_db):
     LiftOver.__init__(self, from_db=from_db, to_db=to_db)
コード例 #39
0
class SubmitHiCLiftOver:
    def __init__(self, args):
        self.args = args
        self.doLiftOver = LiftOver('hg19', 'hg38')

        self.lengths_orig = []
        self.lengths_filtered = []
        self.oldVsNew = []

    def splitStrCoordStr(self, raw):
        chrom = raw.split(':')[0]
        start = raw.split(':')[1].split('-')[0]
        end = raw.split(':')[1].split('-')[1]
        return "\t".join([chrom, start, end])

    def splitStrCoord(self, raw):
        chrom = raw.split(':')[0]
        start = raw.split(':')[1].split('-')[0]
        end = raw.split(':')[1].split('-')[1]
        return [chrom, int(start), int(end)]

    def wrapLiftover(self, debug, chrom, start, end, errMsg):
        lift_start = self.doLiftOver.convert_coordinate(chrom, start)
        if not lift_start:
            if debug:
                print(errMsg + " start", chrom, start)
            return None
        lift_start = lift_start[0]
        lift_end = self.doLiftOver.convert_coordinate(chrom, end)
        if not lift_end:
            if debug:
                print(errMsg + " end", chrom, end)
            return None
        lift_end = lift_end[0]
        if lift_start[0] != lift_end[0]:
            if debug:
                print(errMsg + " no longer same chrom", chrom, start, end, lift_start[0], lift_end[0])
            return None
        oldLen = end - start

        chromLift = lift_start[0]
        startLift = lift_start[1]
        endLift = lift_end[1]
        newLen = endLift - startLift

        if oldLen < 1:
            if debug:
                print(errMsg + " oldLen: negative!", chrom, start, end)
            return None
        if newLen < 1:
            if debug:
                print(errMsg + " newLen: negative!", chromLift, startLift, endLift)
            return None

        absDiff = abs(newLen - oldLen)

        return [chromLift, startLift, endLift, oldLen, newLen, absDiff]

    def coordToStr(self, c):
        return c[0] + ':' + str(c[1]) + '-' + str(c[2])

    def parseLine(self, line):
        # chr10   3240001 4120000 boundary.3|hg19|chr10:3240001-3280000___boundary.4|hg19|chr10:4080001-4120000   1.06090369391
        # [0chrom, 1start, 2end, 3mess, 4value]
        toks = line.split()
        leftCoord = toks[:3]
        leftCoord[1] = int(leftCoord[1])
        leftCoord[2] = int(leftCoord[2])
        mtoks = toks[3].split('|')
        midBoundaryLeft = mtoks[0]

        if 3 != len(mtoks):
            midBoundaryRight = mtoks[2].split('__')[1]

        midCoordRaw = mtoks[2].split('__')[0]
        midCoord = self.splitStrCoord(midCoordRaw)

        if 3 != len(mtoks):
            rightCoord = self.splitStrCoord(mtoks[-1])

        leftCoordLift = self.wrapLiftover(False, leftCoord[0], leftCoord[1], leftCoord[2], "left")
        if not leftCoordLift:
            return None
        self.lengths_orig.append([leftCoordLift[3], leftCoordLift[4]])

        if leftCoordLift[5] > 5000:
            if 0:
                print("skipping b/c of lengths change")
            return None

        midCoordLift = self.wrapLiftover(False, midCoord[0], midCoord[1], midCoord[2], "mid")
        if not midCoordLift:
            return None
        if midCoordLift[5] > 5000:
            return None
        if 3 != len(mtoks):
            rightCoordLift = self.wrapLiftover(False, rightCoord[0], rightCoord[1], rightCoord[2], "right")
            if not rightCoordLift:
                return None
            if rightCoordLift[5] > 5000:
                return None

        self.lengths_filtered.append([leftCoordLift[3], leftCoordLift[4]])

        if 3 != len(mtoks):
            mid = [midBoundaryLeft, "hg38-liftOver", self.coordToStr(midCoordLift) + '___' + midBoundaryRight,
                   "hg38-liftOver", self.coordToStr(rightCoordLift)]
        else:
            mid = [midBoundaryLeft, "hg38-liftOver", self.coordToStr(midCoordLift)]

        ret = "\t".join([str(x) for x in leftCoordLift[:3] + ['|'.join(mid)] + [toks[4]]])
        self.oldVsNew.append([line, ret])
        return ret

    def tmpFile(self, accession, assembly, prefix):
        return os.path.join("/home/mjp/tadsLiftOverHg19ToHg38",
                            assembly + "_liftOver_" + prefix + '_' + accession + ".bed.gz")

    def parseOutFile(self, accession, fnp):
        good = 0
        bad = 0
        with gzip.open(fnp) as f:
            with gzip.open(self.tmpFile(accession, 'hg38', 'point'), 'wb') as outF:
                for line in f:
                    newLine = self.parseLine(line)
                    if newLine:
                        outF.write(newLine + '\n')
                        good += 1
                    else:
                        bad += 1
        print("lifted:", accession, good, bad)

    def runLiftover(self):
        mc = MemCacheWrapper()
        qd = QueryDCC(cache=mc)
        url = "https://www.encodeproject.org/search/?type=Experiment&assay_title=Hi-C&status=released"

        for exp in qd.getExps(url):
            for f in exp.getTADs():
                f.download()
                self.parseOutFile(f.fileID, f.fnp())

        fnp = "/home/mjp/tadsLiftOverHg19ToHg38/lengths_orig.tsv"
        with open(fnp, 'w') as f:
            for r in self.lengths_orig:
                f.write('\t'.join([str(x) for x in r]) + '\n')
        print("wrote", fnp)

        fnp = "/home/mjp/tadsLiftOverHg19ToHg38/lengths_filtered.tsv"
        with open(fnp, 'w') as f:
            for r in self.lengths_filtered:
                f.write('\t'.join([str(x) for x in r]) + '\n')
        print("wrote", fnp)

        fnp = "/home/mjp/tadsLiftOverHg19ToHg38/oldVsNew.tsv"
        with open(fnp, 'w') as f:
            for r in self.oldVsNew:
                f.write(r[0])
                f.write(r[1] + '\n')
        print("wrote", fnp)

    def fileJson(self, exp, f, fnp):
        return {
            "dataset": exp.encodeID,
            "file_format": "bed",
            "file_format_type": "bed3+",
            "file_size": os.path.getsize(fnp),
            "md5sum": Utils.md5(fnp),
            "output_type": f.output_type,
            "assembly": "GRCh38",
            "award": "/awards/U41HG007000/",
            "lab": "/labs/zhiping-weng/",
            "derived_from": [f.fileID],
            "submitted_file_name": fnp,
            "aliases": ["zhiping-weng:hic-tad-hg38-liftOver-" + f.fileID]
        }

    def submitFile(self, exp, f):
        fileAccession = f.fileID
        fnp = self.tmpFile(fileAccession, 'hg38', 'point')
        j = self.fileJson(exp, f, fnp)
        print(j)
        submitFile(self.args, j)

    def runSubmit(self):
        authenticateEncodeTxt(self.args)

        mc = MemCacheWrapper()
        qd = QueryDCC(cache=mc)
        url = "https://www.encodeproject.org/search/?type=Experiment&assay_title=Hi-C&status=released"

        for exp in qd.getExps(url):
            for f in exp.getTADs():
                f.download()
                self.submitFile(exp, f)
コード例 #40
0
   There are some missing arguments modified.
   Usage: compare_mafs MAF_FILE_GDC MAF_FILE_TCGA
        MAF_FILE_1: Path for GDC maf file   
        MAF_FILE_2: Path for TCGA maf file 
   """
   sys.exit()
else:
   gdc_maf_project = sys.argv[1]
   tcga_maf_file  = sys.argv[2]

# Read files in GDC path
gdc_maf_files = glob.glob('../kossproject/*_maf_files_tcga/TCGA.' + gdc_maf_project + '*.maf')
nfiles_gdc = len(gdc_maf_files)

# Read crossing reference
lo = LiftOver('hg19', 'hg38')
fastaRef = pybedtools.example_filename('/mnt/GDCpaper/Homo_sapiens.GRCh38.dna.primary_assembly.fa')

# Variables for count FP, FN, TP, TN
pair_list = {}
TP=0
FP=0
total=0
noncross=0
diffref=0

# Reading each file separately
gdc_var_files_list = [None] * nfiles_gdc
gdc_pairs = []
file = 0
for maf_file in gdc_maf_files:
コード例 #41
0
ファイル: convert-cgi-to-vcf.py プロジェクト: jdblischak/dox
fam_handle = open(fam_fname)

# Dox individuals
dox_fname = args[1]
#dox_fname = "../data/samples.txt"
assert os.path.exists(dox_fname), "Input samples file exists."
dox_handle = open(dox_fname)

contig = os.path.basename(in_fname).split(".")[1]

qual = "."
filter = "."
format = "GT"

# liftOver chain
lo = LiftOver("hg19", "hg38")

# Today's date
today = str(datetime.date.today()).replace("-", "")

# Logging number of SNPs
log_variants = 0   # Number of variants
log_snps = 0       # Number of SNPs
log_success = 0    # Number of SNPs successfully converted to hg38
log_nonconvert = 0 # Number of SNPs with no coordinates in hg38
log_multi = 0      # Number of SNPs with multiple coordinates in hg38
log_diff_chr = 0  # Number of SNPs with hg38 coordinates on diff chromosome

# Parse individuals -----------------------------------------------------------

# All individuals in Plink .fam file
コード例 #42
0
ファイル: lift_over.py プロジェクト: orenlivne/ober
Usage: lift_over.py <from-build> <to-build>

stdin line format: chrom bp_in_from_build
stdout line format: bp_in_to_build, or '-' if not found

Created on February 19, 2014
@author: Oren Livne <*****@*****.**>
============================================================
'''
import sys, traceback, util
from pyliftover import LiftOver

if __name__ == '__main__':
    try:
        src, target = sys.argv[1:3]
        if src == target:
        	for _, bp in (line.strip().split(' ') for line in sys.stdin):
        	    print '%d %d' % (int(bp), int(bp))
        else:
            lo = LiftOver(src, target)
            for chrom, bp in (line.strip().split(' ') for line in sys.stdin):
                out = lo.convert_coordinate('chr' + chrom, int(bp))
                if not out:
                    print '-'
                else:
                    print '%d' % (out[0][1],)
    except:
        traceback.print_exc(file=sys.stdout)
        sys.exit(util.EXIT_FAILURE)
コード例 #43
0
ファイル: hg18_to_hg19.py プロジェクト: kmoad/CRAVAT-testing
from pyliftover import LiftOver
import os
import xml.etree.ElementTree as ET
import shutil

hg19_test = 'seqont_c'
top_dir = 'C:\\Users\\Kyle\\cravat\\CRAVAT-testing\\test_cases\\seqont'
lo = LiftOver('hg19','hg18')
hg18_test = '_'.join(hg19_test.split('_')[:-1]) + '_18'

hg19_dir = os.path.join(top_dir, hg19_test)
hg18_dir = os.path.join(top_dir, hg18_test)

# Make new directory. Move files to new dir with updated names . All changes will be done here
print 'Making folder and files'
if os.path.exists(hg18_dir):
    cont = raw_input('hg18 dir exists, continue? <y/n>: ')
    if cont == 'y':
        shutil.rmtree(hg18_dir)
        os.makedirs(hg18_dir)
    else:
        exit()
else:
    os.makedirs(hg18_dir)
    
shutil.copy(os.path.join(hg19_dir,'%s_desc.xml' %hg19_test), os.path.join(hg18_dir,'%s_desc.xml' %hg18_test))
shutil.copy(os.path.join(hg19_dir,'%s_input.txt' %hg19_test), os.path.join(hg18_dir,'%s_input.txt' %hg18_test)) 
shutil.copy(os.path.join(hg19_dir,'%s_key.csv' %hg19_test), os.path.join(hg18_dir,'%s_key.csv' %hg18_test))    

# Add a <hg18>on</hg18> tag to the desc.xml
print 'Changing desc file'
コード例 #44
0
ファイル: lift_db.py プロジェクト: hunt-genes/gwasc
from pymongo import MongoClient
from pyliftover import LiftOver

mongo_client = MongoClient()
db = mongo_client.fasttrack

lo = LiftOver("hg38ToHg19.over.chain.gz")

unmatched = 0
matched = 0
for r in db.gwas.find():
    chrid = r["chr_id"]
    chrpos = r["chr_pos"]
    if chrid and chrpos:
        try:
            _chrpos = int(chrpos)
        except:
            pass
        else:
            lifted = lo.convert_coordinate("chr%s" % chrid, _chrpos - 1)
            if lifted:
                new_chrid = lifted[0][0].split("chr")[1]
                new_chrpos = lifted[0][1]
                matched += 1
                db.gwas.update_many(
                    {"chr_id": chrid, "chr_pos": chrpos}, {"$set": {"hg19chr": new_chrid, "hg19pos": new_chrpos}}
                )
            else:
                # print('NONE: %s %s' %(chrid, chrpos))
                unmatched += 1
コード例 #45
0
ファイル: manhattan_and_qq.py プロジェクト: team149/tc9
def plot_manhattan(
    args, annotations, l_x, l_y, l_c, x_ticks, y_max, d_pos_init_chrom):

    y_max = max(int(y_max + 3), args.min_y)

    if args.EFO:
        ## Just make some assumptions about builds here for now.
        ## https://en.wikipedia.org/wiki/Reference_genome
        lo = LiftOver('hg38', 'hg19')
        with open(args.EFO) as f:
            cnt = collections.Counter()
            for line in f:
                cnt[line.split('\t')[7]] += 1
            trait_most_common = cnt.most_common(1)[0][0]
        with open(args.EFO) as f:
            ## Skip header.
            for line in f:
                break
            for line in f:
                l = line.split('\t')
#                ## Try to weed out all the garbage present in the GWAS catalog.
#                if not l[7] == trait_most_common:
#                    continue
                CHR_ID = l[11]
                ## Skip if missing data.
                if CHR_ID == '':
                    continue
                try:
                    CHR_POS = int(l[12])
                ## Continue if CHR_POS is not an integer.
                except ValueError:
                    continue
                rsID = l[21]
                y = PVALUE_MLOG = min(y_max, float(l[28]))
#                if y < -math.log10(args.threshold_p):
#                    continue
                try:
                    x = d_pos_init_chrom[CHR_ID] + lo.convert_coordinate(
                        'chr{}'.format(CHR_ID), CHR_POS)[0][1]
                except KeyError:
                    assert CHR_ID == 'X'
                    continue
                except IndexError:
                    print('IndexError', CHR_ID, CHR_POS, lo.convert_coordinate('chr{}'.format(CHR_ID), CHR_POS), file=sys.stderr)
                    continue
#                l_x.append(x)
#                l_y.append(y)
#                l_c.append('#FF0000')
                ## Colour most frequently occuring trait red.
                if l[7] == trait_most_common:
                    plt.vlines(x, 0, y, colors='#FF0000', linewidth=0.5, linestyle='--')
                ## Colour less frequently occuring traits orange,
                ## because these might be junk in the GWAS catalog.
                else:
                    plt.vlines(x, 0, y, colors='#FF8000', linewidth=0.5, linestyle='--')

    n = len(l_y)

    plt.ylabel(r'-log$_{10}$($p$)')

#    plt.axhline(-math.log10(0.05 / n), color='0.8', linewidth=0.5)
#    plt.axhline(-math.log10(5 * 10 ** -8), color='0.5', linewidth=0.5)
    plt.axhline(-math.log10(args.threshold_p), color='0.2', linewidth=0.5, linestyle='--')
    try:
        plt.ylim((0, y_max))  # todo: make argument
    except:
        pass

    print('plt.scatter(manhattan)', file=sys.stderr)
    plt.scatter(l_x, l_y, c=l_c, s=3)

    plt.title(args.title, fontsize='small')

    for annotation in annotations:
#        if annotation['prob'] > 0.05 / n:
        if annotation['prob'] > args.threshold_p:
            continue
        print('\t'.join(
            [str(annotation[k]) for k in sorted(annotation.keys())]))
        plt.annotate(
            '\n'.join((
                'p={:.1E}'.format(annotation['prob']),
                'pos={:,}'.format(annotation['pos']),
                'MAF={:.3f}'.format(min(annotation['af'], 1 - annotation['af'])),
                annotation['rsID'],
                ','.join(annotation['gene_names']),
                )),
            xy=(annotation['x'], annotation['y']),
##            xytext=(),
            fontsize='xx-small',
            horizontalalignment='center',
            verticalalignment='bottom',
            rotation=30,
            )

    plt.xticks(
        *zip(*x_ticks),
        rotation=-75, size=6, fontsize=6)

    print('plt.savefig( {}.manhattan.png )'.format(args.out), file=sys.stderr)
    plt.savefig('{}.manhattan.png'.format(args.out), dpi=600)

    return