#
    #

    print "Reading the input file...", options.input
    mygenes = [line.rstrip('\r\n').split('\t') for line in file(options.input,'r').readlines() if line.rstrip('\r\n')]

    data = []
    if mygenes:

        file_symbols1 = os.path.join(os.path.dirname(options.output),'genes_symbols.txt')
        file_symbols2 = os.path.join(os.path.dirname(options.output),'synonyms.txt')

        loci1 = symbols.generate_loci(file_symbols1)
        loci2 = symbols.generate_loci(file_symbols2)

        genes1 = symbols.read_genes_symbols(file_symbols1)
        genes2 = symbols.read_genes_symbols(file_symbols2)

        d = []
        for (g1,g2) in mygenes:
            if g1 and g2 and g1.upper() != g2.upper():
                ens1 = symbols.ensembl(g1.upper(),genes1,loci1)
                ens2 = symbols.ensembl(g2.upper(),genes1,loci1)
                if not ens1:
                    ens1 = symbols.ensembl(g1.upper(),genes2,loci2)
                if not ens2:
                    ens2 = symbols.ensembl(g2.upper(),genes2,loci2)
                    
                if ens1 and ens2:
                    for e1 in ens1:
                        for e2 in ens2:
Example #2
0
                        if gg1 and gg2 and gg1 != gg2:
                            (gg1,gg2) = (gg2,gg1) if gg2 < gg1 else (gg1,gg2)
                            data.add((gg1,gg2))

            print " - found",len(data),"fusions"

            # save version of
            txt = ['Non-cancer tissues and cells (Babiceanu et al. Nucl. Acids Res. 2016) database version: %s\n' % (today.strftime("%Y-%m-%d"),)]
            file(os.path.join(options.output_directory,'version.txt'),'a').writelines(txt)

    #
            # read the gene symbols
            file_symbols = os.path.join(options.output_directory,'synonyms.txt')
            loci = symbols.generate_loci(file_symbols)

            genes = symbols.read_genes_symbols(file_symbols)

            d = []
            for (g1,g2) in data:
                if g1.upper() != g2.upper():
                    ens1 = symbols.ensembl(g1.upper(),genes,loci)
                    ens2 = symbols.ensembl(g2.upper(),genes,loci)
                    if ens1 and ens2:
                        for e1 in ens1:
                            for e2 in ens2:
                                if e1 != e2:
                                    d.append([e1,e2])

            data = ['\t'.join(sorted(line)) + '\n' for line in d]
            data = sorted(set(data))
    print "Reading the input file...", options.input
    mygenes = [
        line.rstrip('\r\n').split('\t')
        for line in file(options.input, 'r').readlines() if line.rstrip('\r\n')
    ]

    data = []
    if mygenes:

        #file_symbols = os.path.join(options.output_directory,'genes_symbols.txt')
        file_symbols = os.path.join(os.path.dirname(options.output),
                                    'synonyms.txt')

        loci = symbols.generate_loci(file_symbols)

        genes = symbols.read_genes_symbols(file_symbols)

        d = []
        for (g1, g2) in mygenes:
            if g1.upper() != g2.upper():
                ens1 = symbols.ensembl(g1.upper(), genes, loci)
                ens2 = symbols.ensembl(g2.upper(), genes, loci)
                if ens1 and ens2:
                    for e1 in ens1:
                        for e2 in ens2:
                            if e1 != e2:
                                d.append([e1, e2])

        data = ['\t'.join(sorted(line)) + '\n' for line in d]
        data = list(set(data))
        data = sorted(data)