# # print "Reading the input file...", options.input mygenes = [line.rstrip('\r\n').split('\t') for line in file(options.input,'r').readlines() if line.rstrip('\r\n')] data = [] if mygenes: file_symbols1 = os.path.join(os.path.dirname(options.output),'genes_symbols.txt') file_symbols2 = os.path.join(os.path.dirname(options.output),'synonyms.txt') loci1 = symbols.generate_loci(file_symbols1) loci2 = symbols.generate_loci(file_symbols2) genes1 = symbols.read_genes_symbols(file_symbols1) genes2 = symbols.read_genes_symbols(file_symbols2) d = [] for (g1,g2) in mygenes: if g1 and g2 and g1.upper() != g2.upper(): ens1 = symbols.ensembl(g1.upper(),genes1,loci1) ens2 = symbols.ensembl(g2.upper(),genes1,loci1) if not ens1: ens1 = symbols.ensembl(g1.upper(),genes2,loci2) if not ens2: ens2 = symbols.ensembl(g2.upper(),genes2,loci2) if ens1 and ens2: for e1 in ens1: for e2 in ens2:
if gg1 and gg2 and gg1 != gg2: (gg1,gg2) = (gg2,gg1) if gg2 < gg1 else (gg1,gg2) data.add((gg1,gg2)) print " - found",len(data),"fusions" # save version of txt = ['Non-cancer tissues and cells (Babiceanu et al. Nucl. Acids Res. 2016) database version: %s\n' % (today.strftime("%Y-%m-%d"),)] file(os.path.join(options.output_directory,'version.txt'),'a').writelines(txt) # # read the gene symbols file_symbols = os.path.join(options.output_directory,'synonyms.txt') loci = symbols.generate_loci(file_symbols) genes = symbols.read_genes_symbols(file_symbols) d = [] for (g1,g2) in data: if g1.upper() != g2.upper(): ens1 = symbols.ensembl(g1.upper(),genes,loci) ens2 = symbols.ensembl(g2.upper(),genes,loci) if ens1 and ens2: for e1 in ens1: for e2 in ens2: if e1 != e2: d.append([e1,e2]) data = ['\t'.join(sorted(line)) + '\n' for line in d] data = sorted(set(data))
print "Reading the input file...", options.input mygenes = [ line.rstrip('\r\n').split('\t') for line in file(options.input, 'r').readlines() if line.rstrip('\r\n') ] data = [] if mygenes: #file_symbols = os.path.join(options.output_directory,'genes_symbols.txt') file_symbols = os.path.join(os.path.dirname(options.output), 'synonyms.txt') loci = symbols.generate_loci(file_symbols) genes = symbols.read_genes_symbols(file_symbols) d = [] for (g1, g2) in mygenes: if g1.upper() != g2.upper(): ens1 = symbols.ensembl(g1.upper(), genes, loci) ens2 = symbols.ensembl(g2.upper(), genes, loci) if ens1 and ens2: for e1 in ens1: for e2 in ens2: if e1 != e2: d.append([e1, e2]) data = ['\t'.join(sorted(line)) + '\n' for line in d] data = list(set(data)) data = sorted(data)