db="TESTING")

cursor = conn_obj.cursor()

cursor.execute("SELECT NAME FROM CSA_catalyticCofactors")

print cursor.fetchall()

cursor.execute("SHOW TABLES FROM TESTING")
for table in cursor.fetchall():
    print list(table)

cursor.execute("SHOW FIELDS FROM CSA_catalyticCofactors")
print cursor.fetchall()

cursor.execute(
    "SELECT column_name FROM information_schema.columns WHERE table_name='CSA_catalyticCofactors'"
)
print cursor.fetchall()

cursor.execute(
    "SELECT PDBID, CHAIN,NUMBER, UPNUMBER , TYPE FROM CSA_CATALYTICRESIDUES")
RESULTS = [list(X) for X in cursor.fetchall()]
print len(RESULTS)
RESULTS = unique(RESULTS)
print RESULTS
print len(RESULTS)

RESULTS_NULL = [X for X in RESULTS if X[3] == None]
print len(RESULTS_NULL), RESULTS_NULL
#CANCER_NETWORK_MAPPING_2 - DONE IN NETWORK WITH PRE-JULY DATA
#Check how many genes are actual present in network

import pickle
from FUNCTIONS import UNIPROT_TO_GENES_CLASS, unique

#Load CANCER_to_Gene dict
FILE_IN1=open("DATABASES/OBJECTS/DICT_CANCER_TO_GENES.pi")
CANCER_DICT=pickle.load(FILE_IN1)
FILE_IN1.close()

#Get all cancer genes
ALL_CANCER_GENES=[]
for genes in CANCER_DICT.itervalues():
    ALL_CANCER_GENES=ALL_CANCER_GENES+genes
ALL_CANCER_GENES=unique(ALL_CANCER_GENES)
print len(ALL_CANCER_GENES)

#Load NETWORK UNIPROTS
FILE_IN2=open("NETWORK/ENDOGENOUS_UNIPROT")
UNIPROTS=FILE_IN2.read().splitlines()

#Run class
UNIPROT_CLASS=UNIPROT_TO_GENES_CLASS(UNIPROTS)
NETWORK_GENES=UNIPROT_CLASS.get_genes()
NETWORK_UNIPROT_GENES=UNIPROT_CLASS.uniprot_gene()
print "uniprot in network has match", len(UNIPROT_CLASS.has_genes_uniprots())
print "total cancer genes in network", len([X for X in ALL_CANCER_GENES if X in NETWORK_GENES])

#Check per cancer and make UNIPROT_DICT
CANCER_TO_UNIPROT_DICT=dict((X, []) for X in CANCER_DICT.iterkeys())
Esempio n. 3
0
METABOLITES_ZERO=[]
for metabolite in METABO_EN.nodes():
    if METABO_EN.in_degree(metabolite)==0 and METABO_EN.out_degree(metabolite)>0:
        METABOLITES.append(metabolite)
    elif METABO_EN.in_degree(metabolite)==0 and METABO_EN.out_degree(metabolite)==0:
        METABOLITES_ZERO.append(metabolite)
print "METABOLITES:", len (METABOLITES)
print len (METABOLITES) + len (METABOLITES_ZERO)
print METABOLITES[:20]

#Get proteins only
PROTEINS=[]
for protein in METABO_EN.nodes():
    if METABO_EN.in_degree(protein)>0:
        PROTEINS.append(protein)
print "PROTEINS:",len (PROTEINS)
print "UNIQUE PROTEINS:", len (unique(PROTEINS))
print PROTEINS[:5]

#DRAW NETWORK
plt.figure(figsize=(25,12))
pos=NX.spectral_layout(METABO_EN)

NX.draw_networkx_nodes(METABO_EN, pos, nodelist=METABOLITES, node_color="r", node_size=100)
#NX.draw_networkx_nodes(METABO_EN, pos, nodelist=METABOLITES_ZERO, node_color="b", node_size=100) #EXCLUDE ZEROES ALTOGETHER
                                                                                                    #FOR NOW
NX.draw_networkx_nodes(METABO_EN, pos, nodelist=PROTEINS, node_color="g", node_size=10)
NX.draw_networkx_edges(METABO_EN, pos, width=0.2)

plt.show()
#Separate cancer genes by cancer type

import subprocess, pickle
from FUNCTIONS import unique

#Get genes in stats
GENES_STATS=subprocess.check_output("ls", cwd="DATABASES/CANCER_DATA/geneStats").splitlines()

#CANCER TYPES
CANCER_DICT={"LUSC":[], "READ":[], "GBM":[], "KIRC":[], "UCEC":[], "OV":[], "BRCA":[], "COAD":[]}

#Assign to CANCER TYPES
for gene in GENES_STATS:
    FILE_IN=open("DATABASES/CANCER_DATA/geneStats/%s"%gene)
    RECORDS=FILE_IN.read().splitlines()
    FILE_IN.close()
    
    for record in RECORDS:
        CANCER_DICT[record.split()[4].strip()]=CANCER_DICT[record.split()[4].strip()]+[gene.split(".")[0]]

for cancer in CANCER_DICT.iterkeys():
    CANCER_DICT[cancer]=unique(CANCER_DICT[cancer])
    print cancer, CANCER_DICT[cancer]

#Store as object
PICKLE_OUT=open("DATABASES/OBJECTS/DICT_CANCER_TO_GENES.pi", "w")
pickle.dump(CANCER_DICT, PICKLE_OUT)

for cancer in CANCER_DICT.iterkeys():
    print cancer, len(CANCER_DICT[cancer])
    
Esempio n. 5
0
            metabolite) == 0 and METABO_EN.out_degree(metabolite) > 0:
        METABOLITES.append(metabolite)
    elif METABO_EN.in_degree(metabolite) == 0 and METABO_EN.out_degree(
            metabolite) == 0:
        METABOLITES_ZERO.append(metabolite)
print "METABOLITES:", len(METABOLITES)
print len(METABOLITES) + len(METABOLITES_ZERO)
print METABOLITES[:20]

#Get proteins only
PROTEINS = []
for protein in METABO_EN.nodes():
    if METABO_EN.in_degree(protein) > 0:
        PROTEINS.append(protein)
print "PROTEINS:", len(PROTEINS)
print "UNIQUE PROTEINS:", len(unique(PROTEINS))
print PROTEINS[:5]

#DRAW NETWORK
plt.figure(figsize=(25, 12))
pos = NX.spectral_layout(METABO_EN)

NX.draw_networkx_nodes(METABO_EN,
                       pos,
                       nodelist=METABOLITES,
                       node_color="r",
                       node_size=100)
#NX.draw_networkx_nodes(METABO_EN, pos, nodelist=METABOLITES_ZERO, node_color="b", node_size=100) #EXCLUDE ZEROES ALTOGETHER
#FOR NOW
NX.draw_networkx_nodes(METABO_EN,
                       pos,
UNIPROT_GENE_DICT = {}
for cancer in CANCER_DICT.itervalues():
    for record in cancer:
        UNIPROT_GENE_DICT[record.split("|")[0]] = record.split("|")[1]

#BREAK DOWN NODES BY CANCER
OV_PROTEINS = [
    X for X in PROTEINS
    if X.split("|")[1] in [Y.split("|")[0] for Y in CANCER_DICT["OV"]]
]
print "OV_PROTEINS", len(OV_PROTEINS)

OV_METABOLITES = []
for protein in OV_PROTEINS:
    OV_METABOLITES = OV_METABOLITES + METABO_EN.neighbors(protein)
OV_METABOLITES = unique(OV_METABOLITES)
print "OV_METABOLITES", len(OV_METABOLITES)

OV_EDGES = METABO_EN.edges(OV_PROTEINS)
OV_EDGES = unique(OV_EDGES)

OV_LABELS_PROTEINS = {}
for protein in OV_PROTEINS:
    OV_LABELS_PROTEINS[protein] = UNIPROT_GENE_DICT[protein.split("|")[1]]

OV_LABELS_METABOLITES = dict(
    (X[0], X[1]) for X in zip(OV_METABOLITES, OV_METABOLITES))

#DRAW NETWORK
plt.figure(figsize=(25, 12))
pos = NX.spring_layout(METABO_EN)
#GET ALL ENDOGENOUS UNIPROTS FROM NETWORK
from FUNCTIONS import unique

FILE_IN=open("NETWORK/ENDOGENOUS.ADJ").read().splitlines()

ENDOGENOUS_UNIPROT=[]
for record in FILE_IN:
    if len(record.split())>1:
        ENDOGENOUS_UNIPROT=ENDOGENOUS_UNIPROT+[X.split("|")[1].strip() for X in record.split()[1:]]
print len(ENDOGENOUS_UNIPROT), ENDOGENOUS_UNIPROT
ENDOGENOUS_UNIPROT=unique(ENDOGENOUS_UNIPROT)
print len(ENDOGENOUS_UNIPROT)


FILE_OUT=open("NETWORK/ENDOGENOUS_UNIPROT", "w")
for record in ENDOGENOUS_UNIPROT:
    FILE_OUT.write(record+"\n")
FILE_OUT.close()
#Create connection object that represents database
conn_obj=MQ.connect(host="localhost",user="******", passwd="mysql", db="TESTING")

cursor=conn_obj.cursor()

cursor.execute("SELECT NAME FROM CSA_catalyticCofactors")

print cursor.fetchall()

cursor.execute("SHOW TABLES FROM TESTING")
for table in cursor.fetchall():
    print list(table)
    
cursor.execute("SHOW FIELDS FROM CSA_catalyticCofactors")
print cursor.fetchall()

cursor.execute("SELECT column_name FROM information_schema.columns WHERE table_name='CSA_catalyticCofactors'")
print cursor.fetchall()

cursor.execute("SELECT PDBID, CHAIN,NUMBER, UPNUMBER , TYPE FROM CSA_CATALYTICRESIDUES")
RESULTS= [list(X) for X in cursor.fetchall()]
print len(RESULTS)
RESULTS=unique(RESULTS)
print RESULTS
print len(RESULTS)

RESULTS_NULL=[X for X in RESULTS if X[3]==None]
print len(RESULTS_NULL), RESULTS_NULL

#GET ALL ENDOGENOUS UNIPROTS FROM NETWORK
from FUNCTIONS import unique

FILE_IN = open("NETWORK/ENDOGENOUS.ADJ").read().splitlines()

ENDOGENOUS_UNIPROT = []
for record in FILE_IN:
    if len(record.split()) > 1:
        ENDOGENOUS_UNIPROT = ENDOGENOUS_UNIPROT + [
            X.split("|")[1].strip() for X in record.split()[1:]
        ]
print len(ENDOGENOUS_UNIPROT), ENDOGENOUS_UNIPROT
ENDOGENOUS_UNIPROT = unique(ENDOGENOUS_UNIPROT)
print len(ENDOGENOUS_UNIPROT)

FILE_OUT = open("NETWORK/ENDOGENOUS_UNIPROT", "w")
for record in ENDOGENOUS_UNIPROT:
    FILE_OUT.write(record + "\n")
FILE_OUT.close()
TO_R2=open("/Users/jzamalloa/Desktop/FOLDER/LAB/ECLIPSE_TO_R/HUBS_90_DEGREE", "w")
HUBS_90_DEGREE=[]
HUBS_90_PROTEINS=[]
for metabolite in CLEAN_TC_LIST:
    HUBS_90_DEGREE.append(METABO_NT.out_degree(metabolite))
    TO_R2.write(str(METABO_NT.out_degree(metabolite))+"\n")
    PROTEINS=METABO_NT.neighbors(metabolite)
    for prot in PROTEINS:
        HUBS_90_PROTEINS.append(prot.split("|")[1])
TO_R2.close()

print len(HUBS_90_DEGREE)
print len(HUBS_90_PROTEINS)
print HUBS_90_PROTEINS[:5]
HUBS_90_PROTEINS=unique(HUBS_90_PROTEINS)
print len(HUBS_90_PROTEINS)

#Plot degree distribution
plt.hist(HUBS_90_DEGREE)
plt.show()

#Get gene names out of these proteins, use uniprot_sprot.dat
HUBS_90_GENES=[]
for protein in HUBS_90_PROTEINS:
    PROT_GENE=UNIPROT_GENE_PLUS(protein)
    for gene in PROT_GENE:
        HUBS_90_GENES.append(protein+"$"+gene)
    
print HUBS_90_GENES[0:5]
print len(HUBS_90_GENES)
CANCER_DICT=pickle.load(FILE_IN1)

#Make {UNIPROT:GENE} dictionary
UNIPROT_GENE_DICT={}
for cancer in CANCER_DICT.itervalues():
    for record in cancer:
        UNIPROT_GENE_DICT[record.split("|")[0]]=record.split("|")[1]

#BREAK DOWN NODES BY CANCER
OV_PROTEINS=[X for X in PROTEINS if X.split("|")[1] in [Y.split("|")[0] for Y in CANCER_DICT["OV"]]]
print "OV_PROTEINS", len(OV_PROTEINS)        

OV_METABOLITES=[]
for protein in OV_PROTEINS:
    OV_METABOLITES=OV_METABOLITES+METABO_EN.neighbors(protein)
OV_METABOLITES=unique(OV_METABOLITES)
print "OV_METABOLITES", len(OV_METABOLITES)

OV_EDGES=METABO_EN.edges(OV_PROTEINS)
OV_EDGES=unique(OV_EDGES)

OV_LABELS_PROTEINS={}
for protein in OV_PROTEINS:
    OV_LABELS_PROTEINS[protein]=UNIPROT_GENE_DICT[protein.split("|")[1]]

OV_LABELS_METABOLITES=dict((X[0],X[1]) for X in zip(OV_METABOLITES, OV_METABOLITES))

#DRAW NETWORK
plt.figure(figsize=(25,12))
pos=NX.spring_layout(METABO_EN)
#CANCER_NETWORK_MAPPING_2 - DONE IN NETWORK WITH PRE-JULY DATA
#Check how many genes are actual present in network

import pickle
from FUNCTIONS import UNIPROT_TO_GENES_CLASS, unique

#Load CANCER_to_Gene dict
FILE_IN1 = open("DATABASES/OBJECTS/DICT_CANCER_TO_GENES.pi")
CANCER_DICT = pickle.load(FILE_IN1)
FILE_IN1.close()

#Get all cancer genes
ALL_CANCER_GENES = []
for genes in CANCER_DICT.itervalues():
    ALL_CANCER_GENES = ALL_CANCER_GENES + genes
ALL_CANCER_GENES = unique(ALL_CANCER_GENES)
print len(ALL_CANCER_GENES)

#Load NETWORK UNIPROTS
FILE_IN2 = open("NETWORK/ENDOGENOUS_UNIPROT")
UNIPROTS = FILE_IN2.read().splitlines()

#Run class
UNIPROT_CLASS = UNIPROT_TO_GENES_CLASS(UNIPROTS)
NETWORK_GENES = UNIPROT_CLASS.get_genes()
NETWORK_UNIPROT_GENES = UNIPROT_CLASS.uniprot_gene()
print "uniprot in network has match", len(UNIPROT_CLASS.has_genes_uniprots())
print "total cancer genes in network", len(
    [X for X in ALL_CANCER_GENES if X in NETWORK_GENES])

#Check per cancer and make UNIPROT_DICT
Esempio n. 13
0
    "LUSC": [],
    "READ": [],
    "GBM": [],
    "KIRC": [],
    "UCEC": [],
    "OV": [],
    "BRCA": [],
    "COAD": []
}

#Assign to CANCER TYPES
for gene in GENES_STATS:
    FILE_IN = open("DATABASES/CANCER_DATA/geneStats/%s" % gene)
    RECORDS = FILE_IN.read().splitlines()
    FILE_IN.close()

    for record in RECORDS:
        CANCER_DICT[record.split()[4].strip(
        )] = CANCER_DICT[record.split()[4].strip()] + [gene.split(".")[0]]

for cancer in CANCER_DICT.iterkeys():
    CANCER_DICT[cancer] = unique(CANCER_DICT[cancer])
    print cancer, CANCER_DICT[cancer]

#Store as object
PICKLE_OUT = open("DATABASES/OBJECTS/DICT_CANCER_TO_GENES.pi", "w")
pickle.dump(CANCER_DICT, PICKLE_OUT)

for cancer in CANCER_DICT.iterkeys():
    print cancer, len(CANCER_DICT[cancer])