Ejemplo n.º 1
0
            ]
            d.pop(0)  # remove the header
            data = set(d)

            print "%d known genes found (using gene symbols)" % (len(data), )

            # read the gene symbols
            file_symbols = os.path.join(options.output_directory,
                                        'synonyms.txt')
            loci = symbols.generate_loci(file_symbols)

            genes = symbols.read_genes_symbols(file_symbols)

            d = []
            for g in data:
                ens = symbols.ensembl(g.upper(), genes, loci)
                if ens:
                    d.extend(ens)

            data = [line + '\n' for line in d]
            data = sorted(set(data))

            print "%d known genes found (after conversion to Ensembl ids)" % (
                len(data), )

        file(os.path.join(options.output_directory, 'oncogenes_more.txt'),
             'w').writelines(data)

        if os.path.exists(tmp_file):
            os.remove(tmp_file)
Ejemplo n.º 2
0
            ]
            file(os.path.join(options.output_directory, 'version.txt'),
                 'a').writelines(txt)

            #
            # read the gene symbols
            file_symbols = os.path.join(options.output_directory,
                                        'synonyms.txt')
            loci = symbols.generate_loci(file_symbols)

            genes = symbols.read_genes_symbols(file_symbols)

            d = []
            for (g1, g2) in data:
                if g1.upper() != g2.upper():
                    ens1 = symbols.ensembl(g1.upper(), genes, loci)
                    ens2 = symbols.ensembl(g2.upper(), genes, loci)
                    if ens1 and ens2:
                        for e1 in ens1:
                            for e2 in ens2:
                                if e1 != e2:
                                    d.append([e1, e2])

            data = ['\t'.join(sorted(line)) + '\n' for line in d]
            data = sorted(set(data))

            print "%d known fusion genes found" % (len(data), )

            if not options.skip_filter_overlap:
                ens2hugo = dict([
                    tuple(line.rstrip('\r\n').split('\t')) for line in file(
Ejemplo n.º 3
0
                # read the gene symbols
                file_symbols = os.path.join(options.output_directory,
                                            'synonyms.txt')
                genes = symbols.read_genes_symbols(file_symbols)

                banned = set()
                loci = symbols.generate_loci(file_symbols)
                #for v in symbols.locus.values():
                for v in loci.values():
                    if v:
                        n = len(v)
                        if n > 1:
                            for i in xrange(n - 1):
                                for j in xrange(i + 1, n):
                                    if v[i].upper() != v[j].upper():
                                        ens1 = symbols.ensembl(
                                            v[i].upper(), genes, loci)
                                        ens2 = symbols.ensembl(
                                            v[j].upper(), genes, loci)
                                        if ens1 and ens2:
                                            for e1 in ens1:
                                                for e2 in ens2:
                                                    if e1 != e2:
                                                        (e1, e2) = (
                                                            e2, e1
                                                        ) if e2 < e1 else (e1,
                                                                           e2)
                                                        banned.add((e1, e2))

                d = []
                for (g1, g2) in fusions:
                    if (g1.upper() == g2.upper()
Ejemplo n.º 4
0
            # save version of
            txt = ['Non-cancer tissues and cells (Babiceanu et al. Nucl. Acids Res. 2016) database version: %s\n' % (today.strftime("%Y-%m-%d"),)]
            file(os.path.join(options.output_directory,'version.txt'),'a').writelines(txt)

    #
            # read the gene symbols
            file_symbols = os.path.join(options.output_directory,'synonyms.txt')
            loci = symbols.generate_loci(file_symbols)

            genes = symbols.read_genes_symbols(file_symbols)

            d = []
            for (g1,g2) in data:
                if g1.upper() != g2.upper():
                    ens1 = symbols.ensembl(g1.upper(),genes,loci)
                    ens2 = symbols.ensembl(g2.upper(),genes,loci)
                    if ens1 and ens2:
                        for e1 in ens1:
                            for e2 in ens2:
                                if e1 != e2:
                                    d.append([e1,e2])

            data = ['\t'.join(sorted(line)) + '\n' for line in d]
            data = sorted(set(data))

            print "%d known fusion genes found" % (len(data),)

            if not options.skip_filter_overlap:
                ens2hugo = dict([tuple(line.rstrip('\r\n').split('\t')) for line in file(os.path.join(options.output_directory,'genes_symbols.txt'),'r').readlines() if line.rstrip('\r\n')])
Ejemplo n.º 5
0
            # read the gene symbols
            file_symbols = os.path.join(options.output_directory,'synonyms.txt')
            genes = symbols.read_genes_symbols(file_symbols)

            banned = set()
            loci = symbols.generate_loci(file_symbols)
            #for v in symbols.locus.values():
            for v in loci.values():
                if v:
                    n = len(v)
                    if n > 1:
                        for i in xrange(n-1):
                            for j in xrange(i+1,n):
                                if v[i].upper() != v[j].upper():
                                    ens1 = symbols.ensembl(v[i].upper(),genes,loci)
                                    ens2 = symbols.ensembl(v[j].upper(),genes,loci)
                                    if ens1 and ens2:
                                        for e1 in ens1:
                                            for e2 in ens2:
                                                if e1 != e2:
                                                    (e1,e2) = (e2,e1) if e2 < e1 else (e1,e2)
                                                    banned.add((e1,e2))


            d = []
            for (g1,g2) in fusions:
                if ( g1.upper() == g2.upper() or ((g1.endswith('@') and g2.endswith('@')) and g1.upper()[:2] == g2.upper()[:2])):
                    print "%s-%s skipped!" % (g1,g2)
                    continue
                ens1 = symbols.ensembl(g1,genes,loci)
Ejemplo n.º 6
0
]



    data = enlarge.get(options.organism.lower(),[])
    if data:

        #file_symbols = os.path.join(options.output_directory,'genes_symbols.txt')
        file_symbols = os.path.join(options.output_directory,'synonyms.txt')
        loci = symbols.generate_loci(file_symbols)

        genes = symbols.read_genes_symbols(file_symbols)

        d = []
        for g in data:
            ens = symbols.ensembl(g.upper(),genes,loci)
            if ens:
                for e in ens:
                    d.append(e)
            else:
                print "   - Original:",g

        data = [line + '\n' for line in d]
        data = sorted(set(data))

        print "%d genes to be enlarged and covered" % (len(data),)


    file(os.path.join(options.output_directory,'enlarge.txt'),'w').writelines(data)
    #
Ejemplo n.º 7
0
    data = []
    if mygenes:

        file_symbols1 = os.path.join(os.path.dirname(options.output),'genes_symbols.txt')
        file_symbols2 = os.path.join(os.path.dirname(options.output),'synonyms.txt')

        loci1 = symbols.generate_loci(file_symbols1)
        loci2 = symbols.generate_loci(file_symbols2)

        genes1 = symbols.read_genes_symbols(file_symbols1)
        genes2 = symbols.read_genes_symbols(file_symbols2)

        d = []
        for (g1,g2) in mygenes:
            if g1 and g2 and g1.upper() != g2.upper():
                ens1 = symbols.ensembl(g1.upper(),genes1,loci1)
                ens2 = symbols.ensembl(g2.upper(),genes1,loci1)
                if not ens1:
                    ens1 = symbols.ensembl(g1.upper(),genes2,loci2)
                if not ens2:
                    ens2 = symbols.ensembl(g2.upper(),genes2,loci2)
                    
                if ens1 and ens2:
                    for e1 in ens1:
                        for e2 in ens2:
                            if e1 and e2 and e1 != e2:
                                d.append([e1,e2])

        data = ['\t'.join(sorted(line)) + '\n' for line in d]
        data = list(set(data))
        data = sorted(data)
Ejemplo n.º 8
0
        file_symbols1 = os.path.join(os.path.dirname(options.output),
                                     'genes_symbols.txt')
        file_symbols2 = os.path.join(os.path.dirname(options.output),
                                     'synonyms.txt')

        loci1 = symbols.generate_loci(file_symbols1)
        loci2 = symbols.generate_loci(file_symbols2)

        genes1 = symbols.read_genes_symbols(file_symbols1)
        genes2 = symbols.read_genes_symbols(file_symbols2)

        d = []
        for (g1, g2) in mygenes:
            if g1 and g2 and g1.upper() != g2.upper():
                ens1 = symbols.ensembl(g1.upper(), genes1, loci1)
                ens2 = symbols.ensembl(g2.upper(), genes1, loci1)
                if not ens1:
                    ens1 = symbols.ensembl(g1.upper(), genes2, loci2)
                if not ens2:
                    ens2 = symbols.ensembl(g2.upper(), genes2, loci2)

                if ens1 and ens2:
                    for e1 in ens1:
                        for e2 in ens2:
                            if e1 and e2 and e1 != e2:
                                d.append([e1, e2])

        data = ['\t'.join(sorted(line)) + '\n' for line in d]
        data = list(set(data))
        data = sorted(data)