Ejemplo n.º 1
0
 def __loadRawFold(self, iFold):
     self.dDrug, self.dSe, self.trains, self.tests, self.validates, self.dDes = loadFold(
         iFold)
     self.id2Drug = utils.reverse_dict(self.dDrug)
     self.id2Se = utils.reverse_dict(self.dSe)
     self.nD = len(self.dDrug)
     self.nSe = len(self.dSe)
     self.currentTrainIdx = 0
     self.currentTestIdx = 0
     self.currentValidIdx = 0
     self.featureSize = self.dDes[list(self.dDes.keys())[0]].shape[0]
     print("Feature size: ", self.featureSize)
Ejemplo n.º 2
0
def runTTest():
    producers = []
    consumers = []
    queue = Queue(params.K_FOLD)
    counter = Value('i', 0)
    counter2 = Value('i', 0)

    dList = utils.load_obj("%s/DataDump.o" % OUT_DIR)
    dDrugPair2Id, drugPairList = loadDictName2Id("%s/%sPairs.txt" %
                                                 (OUT_DIR, PREF),
                                                 nMax=-1,
                                                 min=1)
    dDrug2Id, _ = loadDictName2Id("%s/%sADrugs.txt" % (OUT_DIR, PREF))
    dInd2Id, _ = loadDictName2Id("%s/%sAInd.txt" % (OUT_DIR, PREF))
    dSe2Id, _ = loadDictName2Id("%s/%sASe.txt" % (OUT_DIR, PREF))
    dId2Se = utils.reverse_dict(dSe2Id)

    inputList = loadRawExpose()
    nInputList = len(inputList)

    nDPerWorker = int(nInputList / params.N_DATA_WORKER)
    # assert 'g-csf' in allDrugNames
    for i in range(params.N_DATA_WORKER):
        startInd = i * nDPerWorker
        endInd = (i + 1) * nDPerWorker
        endInd = min(endInd, nInputList)
        if i == params.N_DATA_WORKER - 1:
            endInd = nInputList
        data = inputList[
            startInd:endInd], drugPairList, dDrug2Id, dId2Se, dList
        producers.append(Process(target=producer, args=(queue, data)))

    fout = open("%s/%s" % (OUT_DIR, "ttStatsRe"), "w")
    p = Process(target=consumer, args=(queue, counter, counter2, fout, []))
    p.daemon = True
    consumers.append(p)

    print("Start Producers...")
    for p in producers:
        p.start()
    print("Start Consumers...")
    for p in consumers:
        p.start()

    for p in producers:
        p.join()
    print("Finish Producers")

    queue.put(None)

    while True:
        if counter.value == 0:
            time.sleep(0.01)
            continue
        else:
            break
    fout.flush()
    fout.close()
Ejemplo n.º 3
0
def debug():
    iFold = 1
    polySE = PolySEData(iFold)
    matInp, matOut, _ = polySE.getNextMinibatchTest(-1)

    for ii in range(10):
        print(ii)
        t3 = matInp[ii]
        to3 = matOut[ii]
        nzd = np.nonzero(t3)[0]
        nzs = np.nonzero(to3)[0]
        dId2Drug = utils.reverse_dict(polySE.dDrug)
        dId2Se = utils.reverse_dict(polySE.dSe)

        drugNames = [dId2Drug[i] for i in nzd]
        seNames = [dId2Se[i] for i in nzs]
        print(",".join(drugNames))
        print(",".join(seNames))
Ejemplo n.º 4
0
def producer(queue, datas):
    oRs, drugPairList, dDrug2Id, dId2Se, dList = datas
    for oR in oRs:
        pId, rExposeIds, rNonExposeIds = oR
        dPair = drugPairList[pId]
        rExpose = getSubList(dList, rExposeIds)
        # rNoneExpose = dList[rNonExposeIds]

        seSet = set()
        for r in rExpose:
            _, _, ses = r
            for se in ses:
                seSet.add(se)
        n1 = max(int(len(rExposeIds) / 10), 1)
        n2 = max(int(len(rNonExposeIds) / 10), 1)
        # print(n1, n2, len(rExposeIds), len(rNonExposeIds))
        # ar1 = np.random.choice(rExposeIds, (1000, n1), replace=False)
        # ar2 = np.random.choice(rNonExposeIds, (1000, n2), replace=False)
        nSe = len(seSet)
        dOldSeId2NewId = dict()
        for se in seSet:
            dOldSeId2NewId[se] = len(dOldSeId2NewId)
        dId2NewSeIdOld = utils.reverse_dict(dOldSeId2NewId)

        def calRatio(dList, ar, nSe, nCount):
            appears = np.zeros((1000, nSe))
            for i in range(1000):
                rIds = np.random.choice(ar, nCount, replace=False)
                rs = getSubList(dList, rIds)

                for r in rs:
                    _, _, seIds = r
                    see = []
                    for seId in seIds:
                        newSeId = utils.get_dict(dOldSeId2NewId, seId, -1)
                        if newSeId != -1:
                            see.append(newSeId)
                        appears[i, see] += 1

            notAppear = nCount - appears + 1e-10
            ratio = appears / notAppear
            return ratio

        ratioExpose = calRatio(dList, rExposeIds, nSe, n1)
        ratioNonExpose = calRatio(dList, rNonExposeIds, nSe, n2)
        sigSes = []
        for i in range(nSe):
            _, p = ttest_ind(ratioExpose[:, i],
                             ratioNonExpose[:, i],
                             alternative="greater")
            if p <= P_THRESHOLD:
                sigSes.append([dId2NewSeIdOld[i], p])
        for v in sigSes:
            se, p = v
            seName = dId2Se[se]
            queue.put([dPair, seName, p])
Ejemplo n.º 5
0
def db(polySE):
    print("___________DB_______________")
    from utils import utils
    polySE.resetOnePassIndx()
    matInp, matOut, _ = polySE.getNextMinibatchTest(-1)

    for ii in range(2):
        print(ii)
        t3 = matInp[ii]
        to3 = matOut[ii]
        nzd = np.nonzero(t3)[0]
        nzs = np.nonzero(to3)[0]
        dId2Drug = utils.reverse_dict(polySE.dDrug)
        dId2Se = utils.reverse_dict(polySE.dSe)

        drugNames = [dId2Drug[i] for i in nzd]
        seNames = [dId2Se[i] for i in nzs]
        print(",".join(drugNames))
        print(",".join(seNames))
    print("__________________________")