예제 #1
0
    def computeFP(self, FPtype):

        # set SMI after cleanning
        prSMIclean = self.prDesc + "SMIclean/"
        pathFolder.createFolder(prSMIclean)
        self.prSMIclean = prSMIclean

        dFP = {}
        i = 1
        for pSMI in listdir(
                self.prSMI
        ):  # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
            # for pSMI in ["/home/borrela2/interference/spDataAnalysis/Desc/SMIclean/1212-72-2.smi"]: # to verify for one chem
            cas = pSMI.split("/")[-1].split(".")[0]
            print cas, i, len(listdir(self.prSMI))
            i += 1

            psmiles = self.prSMI + cas + ".smi"
            if path.exists(self.prSMI + cas + ".smi"):
                fsmiles = open(psmiles, "r")
                smiles = fsmiles.readlines()[0].strip()
                fsmiles.close()

                # chemical
                chem = chemical.chemical(cas, smiles)
                chem.prepareChem(prSMIclean)
                error = chem.computeFP(FPtype)

                if error == 1:
                    print "ERROR FP"
                    continue
                else:
                    dFP[cas] = chem.FP

        self.dFP = dFP
예제 #2
0
파일: search.py 프로젝트: Sighter/remeta
def search_release (term):
	sFktname = "search_release"

	term = term.replace(" ","+")

	ePrint(1, sFktname, "search the web")

	# search on chemical
	query_chemical = chemical.chemical()
	results = query_chemical.search(term)

	# case if nothing was found
	if len(results) == 0:
		ePrint(1, sFktname, "nothing found on chemical")
		return None

	# case, if more then one was found
	if len(results) > 1:
		ePrint(1, sFktname, "Multiple Links found. Make a Choice\n")
		k = 0
		for i in results:
			print("{0:3d}:\t{1}".format(k , results[k][1]))
			k = k + 1
		print("\n")
		choice = int(input(" <-- "))
		results = [ [ results[choice][0], results[choice][1] ] ]
	
	# create an release instance and feed it
	rel = release()
	rel.shortinfo = results[0][1]
	rel.infopage = results[0][0]
	rel = query_chemical.getReleaseInfo(rel)

	return rel
예제 #3
0
    def computeDesc(self, opera=0, RDkitPhysico=1, pOperaDesc=""):

        pdesc1D2D = self.prDesc + "tableDesc1D2D"
        self.pdesc1D2D = pdesc1D2D

        if opera == 1:
            pdesc1D2D = pdesc1D2D + "Opera"
            self.pdesc1D2D = pdesc1D2D

        if RDkitPhysico == 0:
            pdesc1D2D = pdesc1D2D + "NoRDKITPhyChem"
            self.pdesc1D2D = pdesc1D2D

        prSMIclean = self.prDesc + "SMIclean/"
        pathFolder.createFolder(prSMIclean)
        self.prSMIclean = prSMIclean

        prDescbyCAS = self.prDesc + "DESCbyCAS/"
        pathFolder.createFolder(prDescbyCAS)
        self.prDescByCAS = prDescbyCAS

        if path.exists(pdesc1D2D) and path.getsize(pdesc1D2D) > 100:
            return pdesc1D2D
        else:
            plog = self.prDesc + "log.log"
            flog = open(plog, "w")
            print pdesc1D2D, "No found"
            fdesc1D2D = open(pdesc1D2D, "w")
            if opera == 0:
                ldesc = chemical.getLdesc("1D2D", RDkitPhysico)
                fdesc1D2D.write("CAS\t" + "\t".join(ldesc) + "\n")
            else:
                ldesc = chemical.getLdesc("1D2D", RDkitPhysico)
                ldesc = ldesc + chemical.getLdesc("Opera", RDkitPhysico)
                doperaDesc = loadAllOperaDesc(pOperaDesc)
                fdesc1D2D.write("CAS\t" + "\t".join(ldesc) + "\n")
                #print ldesc

        for pSMI in listdir(self.prSMI):
            #for pSMI in ["/home/borrela2/interference/spDataAnalysis/Desc/SMIclean/1212-72-2.smi"]: # to verify for one chem
            cas = pSMI.split("/")[-1].split(".")[0]
            #print cas

            psmiles = self.prSMI + cas + ".smi"
            if path.exists(self.prSMI + cas + ".smi"):
                fsmiles = open(psmiles, "r")
                smiles = fsmiles.readlines()[0].strip()
                fsmiles.close()

                # chemical
                chem = chemical.chemical(cas, smiles)
                chem.prepareChem(prSMIclean)
                chem.compute1D2DDesc(prDescbyCAS)
                if opera == 1:
                    chem.loadOperaDesc(doperaDesc, flog)
                err = chem.writeTablesDescCAS(prDescbyCAS)  #
                if err == 1: chem.writelog(self.prlog)
                #Write in the table
                chem.writeDesc(ldesc, fdesc1D2D)
        fdesc1D2D.close()
        flog.close()
예제 #4
0
파일: search.py 프로젝트: Sighter/remeta
def search_clever (tr):
	sFktname = "search_clever"
	
	# build search-term
	if tr.artist != "" and tr.title != "":
		term = tr.artist + "+" + tr.title
		term = term.replace("&","")
		term = term.replace("(","")
		term = term.replace(")","")

		term = term.replace(" ","+")
	
	# first try to find the track in the release
	# cache
	if len(cache.rels_found) > 0:
		for rel in cache.rels_found:

			ePrint(1, sFktname, "search in cache")

			# search track in release
			res = rel.search_track(tr)

			# nothing found
			if res == None:
				continue

			# more than one item found
			if len(res) > 1:
				ePrint(1, sFktname, "Multiple tracks found in release, make a choice")
				k = 0
				for t in res:
					print("{0:3d}:\t{1}".format(k , t))
					k = k + 1
				print("\n")
				choice = int(input(" <-- "))
				res = res[choice]
				return res

			return res[0]

	ePrint(1, sFktname, "search the web")

	# search on chemical
	query_chemical = chemical.chemical()
	results = query_chemical.search(term)

	# case if nothing was found
	if len(results) == 0:
		ePrint(1, sFktname, "nothing found on chemical")
		return None

	# case, if more then one was found
	if len(results) > 1:
		ePrint(1, sFktname, "Multiple Links found. Make a Choice\n")
		k = 0
		for i in results:
			print("{0:3d}:\t{1}".format(k , results[k][1]))
			k = k + 1
		print("\n")
		choice = int(input(" <-- "))
		results = [ [ results[choice][0], results[choice][1] ] ]
	
	# create an release instance and feed it
	rel = release()
	rel.shortinfo = results[0][1]
	rel.infopage = results[0][0]
	rel = query_chemical.getReleaseInfo(rel)

	# append the release to the cache
	cache.rels_found.append(rel)

	# search track in release
	res = rel.search_track(tr)

	# nothing found
	if res == None:
		ePrint(1, sFktname, "Track not found in release page")
		return None

	# more than one item found
	if len(res) > 1:
		ePrint(1, sFktname, "Multible tracks found in release, make a choice")
		k = 0
		for t in res:
			print("{0:3d}:\t{1}".format(k , t))
			k = k + 1
		print("\n")
		choice = int(input(" <-- "))
		res = res[choice]
	
	return res[0]
예제 #5
0
def computeDesc(passay, PRDESC, PRSMI, prout, nbfile=1, update=0):

    # by pass
    pdescout = prout + "descMat"
    paff = prout + "aff.txt"
    if path.exists(pdescout) and update == 0 and nbfile == 1:
        return pdescout
    elif path.exists(pdescout) and update == 0 and nbfile == 2 and path.exists(
            paff):
        return [pdescout, paff]

    dchem = toolbox.loadMatrix(passay)
    lchemID = dchem.keys()
    try:
        lchemID.remove("RESULT_UNIT")
    except:
        pass
    shuffle(lchemID)
    i = 0
    nbi = len(lchemID)
    while i < nbi:
        if search("error", dchem[lchemID[i]]
                  ["SMILES"].lower()):  # case of the table is computed before
            del dchem[lchemID[i]]
            del lchemID[i]
            nbi = nbi - 1
            continue

        if dchem[lchemID[i]]["Active"] == "Inconclusive" or search(
                "Error", dchem[lchemID[i]]["SMILES"]):
            del dchem[lchemID[i]]
            del lchemID[i]
            nbi = nbi - 1
            continue

        # compute descriptors
        cchem = chemical.chemical(lchemID[i], dchem[lchemID[i]]["SMILES"])
        cchem.prepareChem(PRSMI)
        if search("error", cchem.log.lower()):
            del dchem[lchemID[i]]
            del lchemID[i]
            nbi = nbi - 1
            continue

        cchem.compute1D2DDesc(PRDESC)
        if search("error", cchem.log.lower()):
            del dchem[lchemID[i]]
            del lchemID[i]
            nbi = nbi - 1
            continue

        cchem.computeOpera(update=update)
        if search("error", cchem.log.lower()):
            del dchem[lchemID[i]]
            del lchemID[i]
            nbi = nbi - 1
            i = i - 1
            continue

        cchem.writeTablesDesc(PRDESC, update=update)

        i = i + 1

    if nbfile == 1:
        fildesc = open(pdescout, "w")
        ldesc = chemical.getLdesc("1D2D", 1) + chemical.getLdesc("Opera", 0)
        fildesc.write("ID," + ",".join(ldesc) + ",Aff" + "\n")

        for chemID in lchemID:
            print chemID
            if dchem[chemID]["Active"] == "Active":
                aff = 1
            else:
                aff = 0
            pdesc = PRDESC + chemID + ".txt"
            if path.exists(pdesc):
                ddesc = toolbox.loadMatrix(pdesc)
                lval = []
                for desc in ldesc:
                    if not desc in ddesc[chemID].keys():
                        lval.append("NA")
                    else:
                        lval.append(str(ddesc[chemID][desc]))

                fildesc.write(chemID + "," + ",".join(lval) + "," + str(aff) +
                              "\n")
        fildesc.close()
        return pdescout

    else:

        fildesc = open(pdescout, "w")
        paff = prout + "aff.txt"
        filaff = open(paff, "w")
        ldesc = chemical.getLdesc("1D2D", 1) + chemical.getLdesc("Opera", 0)
        fildesc.write("ID," + ",".join(ldesc) + "\n")
        filaff.write("ID\tAff\n")

        for chemID in lchemID:
            print chemID
            if dchem[chemID]["Active"] == "Active":
                aff = 1
            else:
                aff = 0
            pdesc = PRDESC + chemID + ".txt"
            if path.exists(pdesc):
                ddesc = toolbox.loadMatrix(pdesc)
                lval = []
                for desc in ldesc:
                    if not desc in ddesc[chemID].keys():
                        lval.append("NA")
                    else:
                        lval.append(str(ddesc[chemID][desc]))

                fildesc.write(chemID + "," + ",".join(lval) + "," + str(aff) +
                              "\n")
                filaff.write(chemID + "\t" + str(aff) + "\n")
        fildesc.close()
        filaff.close()

        return [pdescout, paff]
예제 #6
0
    def predictSMI(self, nameChemical, smiles, plot=0, verbose=0):

        dpred = {}
        prresult = pathFolder.createFolder(self.prout + nameChemical + "/")

        ppred = prresult + "pred"
        if path.exists(ppred):
            dpred = toolbox.loadMatrix(ppred)
            return dpred

        chem = chemical.chemical(nameChemical, smiles)
        chem.prepareChem(prresult)
        chem.compute1D2DDesc(prresult)
        chem.writeTablesDescCAS(prresult)
        chem.computeFP(typeFP="All")

        for channel in self.dcluster:
            for cell in self.dcluster[channel].keys():
                kpred = str(cell) + "_" + str(channel)
                dpred[kpred] = {}
                for typeDesc in self.dcluster[channel][cell].keys():
                    if verbose == 1:
                        print channel, cell, typeDesc
                        print self.dcluster[channel][cell].keys()
                    if search("Desc", typeDesc):
                        distMeth = typeDesc.split("-")[1]
                        aggMeth = typeDesc.split("-")[2]

                        enrichment = runExternalSoft.findCluster(
                            self.cDB.pdesc1D2Dclean, chem.pdesc,
                            self.dcluster[channel][cell][typeDesc]["files"][0],
                            self.dcluster[channel][cell][typeDesc]["files"][1],
                            distMeth, aggMeth)

                    else:
                        # generate FP
                        typeFP = typeDesc.split("-")[0]
                        metric = typeDesc.split("-")[-1].split("_")[0]
                        metricAgg = typeDesc.split("-")[-1]
                        if verbose == 1: print typeFP, metric
                        dFP = {}
                        for CASID in self.cDB.dFP.keys():
                            if verbose == 1:
                                print self.cDB.dFP[CASID]
                                print chem.FP[typeFP]
                                print metric
                            dFP[CASID] = float(
                                toolbox.computeSimilarityFP(
                                    self.cDB.dFP[CASID][typeFP],
                                    chem.FP[typeFP], metric))
                        maxSim = max(dFP.values())
                        i = 0
                        imax = len(dFP.keys())
                        lCAS = dFP.keys()
                        while i < imax:
                            if float(dFP[lCAS[i]] == maxSim):
                                CASclose = lCAS[i]
                            i += 1
                        if verbose == 1:
                            print CASclose
                            print channel, cell
                            print self.ChemClust[CASclose][channel][cell]

                        clusterfound = self.ChemClust[CASclose][channel][cell][
                            str(typeFP) + "-" + str(metricAgg)]
                        enrichment = self.dcluster[channel][cell][typeDesc][
                            clusterfound]['Enrichment']
                    dpred[kpred][typeDesc] = enrichment

        if plot == 1:
            self.writeResultBySMI(dpred, prresult)

        return dpred