コード例 #1
0
def DP_model(X, n_politicians, n_parties, data_desc, a_beta, b_beta, theta,
             gibbs_sweeps, burn_in):
    start_time = time.time()
    if theta == 0:
        # Get theta using newton's method
        theta = DP.crp_parameters(n_politicians, n_parties, 10000)
    print("Got CRP parameters\n")
    model = "DP-Beta-Bernoulli"
    params = {'a': a_beta, 'b': b_beta, 'theta': theta, 'T': gibbs_sweeps}

    # Compute Z matrices
    Z = DP.beta_bernoulli_irm(X=X, **params)
    print("computed Z matrices\n")

    # Compute co-clustering matrix
    co_clust_matrix, avg_n_clusters = co_clustering_matrix(
        Z, burn_in_factor=burn_in)
    print("computed co-clutering matrix\n")
    print(f"average number of clusters {avg_n_clusters}\n")

    # Get point estimate
    labels = point_estimate(co_clust_matrix, avg_n_clusters)

    final_time = time.time()
    diff_time = final_time - start_time
    build_output(data_desc, model, params, co_clust_matrix, avg_n_clusters,
                 labels, diff_time)
コード例 #2
0
def loaddata():

    print('loading the data.....')
    # 标签建词典
    lab_v = []
    lab_v.append('negative')
    lab_v.append('positive')

    # 处理训练数据 包括建数据词典等
    t_data_list_node, data_v = dp.datadeal('data/raw.clean.train',
                                           is_traindata=True)
    # 处理dev数据
    d_data_list_node = dp.datadeal('data/raw.clean.dev', is_traindata=False)
    # 处理test数据
    test_data_node = dp.datadeal('data/raw.clean.test', is_traindata=False)

    if args.out_word_v:
        args.word_embed = out_word_vec.add_word_v(data_v)

    args.embed_num = len(data_v)
    args.class_num = len(lab_v)

    print("\nParameters:")
    for attr, value in sorted(args.__dict__.items()):
        if attr == 'word_embed':
            continue
        print("\t{}={}".format(attr.upper(), value))

    return t_data_list_node, data_v, d_data_list_node, lab_v
コード例 #3
0
def randomReconWrapper(dirName, D, T, L, numSamples, typeGen):
    """Takes in a directory of newick files, dirName, duplication, loss and 
    transfer costs, the number of desired random reconciliations, and the type
    of generator (biased or uniform), and calls those random generators to
    build a file containing the number of temporal inconsistencies found in 
    those randomly generated reconciliations as well as other information 
    relating to the file"""
    totalTimeTravel = 0 # To record total number of time travels in directory
    outOf = 0 # To record total number of reconciliations made
    # loop through files in directory
    for fileName in os.listdir(dirName):
        if fileName.endswith('.newick'):
            f = open(fileName[:-7]+'.txt', 'w')
            f.write(typeGen+" random reconciliations"+"\n")
            hostTree, parasiteTree, phi = newickFormatReader.getInput\
                (dirName+"/"+fileName)
            # find size of parasite and host trees
            parasiteSize = len(parasiteTree)+1
            hostSize = len(hostTree)+1
            DTLReconGraph, numRecon = DP.DP(hostTree, parasiteTree, phi, D, T, L)
            rootList = rootGenerator(DTLReconGraph, parasiteTree)
            randomReconList = []
            for n in range(numSamples):
                timeTravelCount = 0
                startRoot = random.choice(rootList)
                if typeGen == "uniform":
                    currentRecon = uniformRecon(DTLReconGraph, [startRoot], {})
                else: 
                    normalizeDTL = normalizer(DTLReconGraph)
                    currentRecon = biasedRecon(normalizeDTL, [startRoot], {})
                for key in currentRecon.keys():
                    currentRecon[key] = currentRecon[key][:-1]
                randomReconList.append(currentRecon)
            # make sure there are no duplicate reconciliations
            uniqueReconList = []
            for recon in randomReconList:
                if not recon in uniqueReconList:
                    uniqueReconList.append(recon)
            outOf += len(uniqueReconList)
            for recon in uniqueReconList:
                graph = reconciliationGraph.buildReconstruction\
                    (hostTree, parasiteTree, recon)
                currentOrder = orderGraph.date(graph)
                numTrans = findTransfers(recon)
                if currentOrder == 'timeTravel':
                    f.write("Temporal Inconsistency, reconciliation has "+str(numTrans)+" transfers"+"\n")
                    timeTravelCount += 1
                    totalTimeTravel += 1
                else: 
                    f.write("No temporal inconsistencies, reconciliation has "+str(numTrans)+" transfers"+"\n")
            f.write(fileName+" contains "+str(timeTravelCount)+" temporal "+ \
                "inconsistencies out of "+ str(len(uniqueReconList))+ \
                " reconciliations."+"\n"+"Total number of reconciliations: "+\
                str(numRecon)+"\n"+"Host tree size: "+str(hostSize)+"\n"+\
                "Parasite tree size: "+str(parasiteSize)+ "\n")
            f.close()
    print "Total fraction of temporal inconsistencies in directory: ", \
            totalTimeTravel, '/', outOf
コード例 #4
0
def getCostscapeDTLs(DTLPairs, hostTree, parasiteTree, phi):
    """This function takes as input DTLPairs, a list of tuples with T and L 
	costs, and the hostTree, parasiteTree, and phi. It returns a list of DTLs 
	who scores are computed with the T and L values from each element in 
	DTLPairs."""

    DTLList = []
    for i in DTLPairs:
        newDTL = DP(hostTree, parasiteTree, phi, 1, i[0], i[1])[0]
        DTLList.append(newDTL)
    return DTLList
コード例 #5
0
ファイル: Clusters.py プロジェクト: billdthompson/NorthWind
def plotTrain(dataset, catMus, catVariances,durationRange = np.linspace(-dimensionRange,dimensionRange,1000),colour="#0000FF",xBottom=0,alpha=0.5):
	maxHieght = 0.
	#ff.niceGraph()
	for c in range(len(catMus)):
		mu = catMus[c]
		sd = np.sqrt(catVariances[c])
		density = normList(NORM.pdf(durationRange,loc=mu,scale=sd))
		maxDensity = max(density)
		if maxDensity > maxHieght:
			maxHieght = DP.dc(maxDensity)
		pl.fill_between(durationRange,xBottom, density+xBottom,alpha=alpha,color=".35")#color="#FF1493"
		pl.text(durationRange[np.where(density==max(density))[0]-2], maxDensity+xBottom+.001, r'$\mu_%d$' %c,fontsize=10)
コード例 #6
0
ファイル: Clusters.py プロジェクト: billdthompson/NorthWind
def sampleWang(X,cp,parameters=modelParameters):
	Z = list(np.zeros(len(X)))
	sampler = DP.sampleDPMM(X,Z,cp,
		                 DP.gaussianMarginalLikelihood,
		                 DP.gaussianMAPPostPred,
		                 parameters,
		                 iterations=2000,
		                 burn=1000,
		                 thin=5,
		                 cpSampler=DP.sampleCP)
	posteriorSamples = sampler.wangSUGS()
	return posteriorSamples
コード例 #7
0
ファイル: Clusters.py プロジェクト: billdthompson/NorthWind
def sampleClusters(X,cp):
	Z = list(np.zeros(len(X)))
	sampler = DP.sampleDPMM(X,Z,cp,
		                 DP.gaussianMarginalLikelihood,
		                 DP.gaussianPostPred,
		                 modelParameters,
		                 iterations=2000,
		                 burn=1000,
		                 thin=5,
		                 cpSampler=DP.sampleCP)
	posteriorSamples = sampler.gibbs()
	return posteriorSamples
コード例 #8
0
def newScoreWrapper(newickFile, switchLo, switchHi, lossLo, lossHi, D, T, L):
    """This function takes as input hostTree, parasiteTree, phi, duplication 
	cost D, transfer cost T, and loss cost L, and returns the newDTL whose 
	scores were calculated from costscape."""

    H, P, phi = newickFormatReader.getInput(newickFile)
    originalDTL, numRecon, leaves = DP(H, P, phi, D, T, L)
    pointList = findCenters(newickFile, switchLo, switchHi, lossLo, lossHi)
    DTLPairs = getDTLVals(pointList)
    DTLList = getCostscapeDTLs(DTLPairs, H, P, phi)
    newDTL = changeDTLScores(originalDTL, DTLList)
    return newDTL, numRecon, leaves
コード例 #9
0
def main():
    # read the train file from first arugment
    option = sys.argv[1]

    #Data Preprocessing
    DP.train()
    DP.dev()
    DP.test()

    if option == '0':
        # Make top-2000 CTF vectors
        CTF.train()
        CTF.dev()
        CTF.test()

        #train result
        test_RMLR.CTF_final()
    elif option == '1':
        # Make top-2000 DF vectors
        DF.train()
        DF.dev()
        DF.test()

        # train result
        test_RMLR.DF_final()
コード例 #10
0
def run_test(fileName, max_k):
    cache_dir = './cache'
    D = 2.
    T = 3.
    L = 1.

    host, paras, phi = newickFormatReader.getInput(fileName)

    if not os.path.exists(cache_dir):
        os.makedirs(cache_dir)
        f = open('%s/README' % cache_dir, 'w')
        f.write(
            'This directory holds a cache of reconciliation graph for the TreeLife data set'
        )
        f.close()

    cache_location = '%s/%s.graph' % (cache_dir, os.path.split(fileName)[1])
    if not os.path.isfile(cache_location):
        print >> sys.stderr, 'A reconciliation graph has not been built yet for this newick file'
        print >> sys.stderr, 'Doing so now and caching it in {%s}...' % cache_location

        DictGraph, numRecon = DP.DP(host, paras, phi, D, T, L)

        f = open(cache_location, 'w+')
        f.write(repr(DictGraph))
        f.close()

    print >> sys.stderr, 'Loading reonciliation graph from cache'
    f = open(cache_location)
    DictGraph = eval(f.read())
    f.close()

    scoresList, dictReps = Greedy.Greedy(DictGraph, paras)

    print >> sys.stderr, 'Found cluster representatives using point-collecting'

    graph = ReconGraph.ReconGraph(DictGraph)
    setReps = [
        ReconGraph.dictRecToSetRec(graph, dictRep) for dictRep in dictReps
    ]
    random.seed(0)
    extra_reps = [KMeans.get_template(graph) for i in xrange(max_k)]

    representatives = setReps + extra_reps

    print >> sys.stderr, 'Starting K Means algorithm ... '
    print >> sys.stderr, 'Printing Average and Maximum cluster radius at each step'

    for i in xrange(1, max_k + 1):
        print 'k = %d' % i
        KMeans.k_means(graph, 10, i, 0, representatives[:i])
コード例 #11
0
def loaddata(args):
    nd = node.Node()
    nd.__init__()
    dp = DP.DataPrecess()
    train_data_voc, train_data_sentence_num = dp.buildvocab(
        path="raw.clean.train")  # type:list
    label_voc = dp.buildvocab_label(path="classfication")

    train_data, train_lable = dp.readdata_d(path="raw.clean.train",
                                            shuffle=args.shuffle)
    dev_data, dev_lable = dp.readdata_d(path="raw.clean.dev",
                                        shuffle=args.shuffle)
    unusr, dev_sentence_num = dp.readdata_v(path="raw.clean.dev")
    print('train_sentence:', train_data_sentence_num, 'dev_sentence:',
          dev_sentence_num)
    nd.set_dev_data(dev_data)
    nd.set_dev_lable(dev_lable)
    nd.set_dev_sentence_num(dev_sentence_num)

    nd.set_train_data(train_data)
    nd.set_train_data_sentence_num(train_data_sentence_num)
    nd.set_train_lable(train_lable)
    nd.set_train_data_voc(train_data_voc)

    nd.set_label_voc(label_voc)

    if args.out_word_v is True:
        args.word_embed = add_word_v(train_data_voc)

    for i in range(len(args.word_embed)):
        if len(args.word_embed[i]) != 300:
            print('less 300')

    args.embed_num = len(train_data_voc)
    args.class_num = len(label_voc)

    args.cuda = (not args.no_cuda) and torch.cuda.is_available()
    del args.no_cuda
    args.kernel_sizes = [int(k) for k in args.kernel_sizes.split(',')]
    args.save_dir = os.path.join(
        args.save_dir,
        datetime.datetime.now().strftime('%Y-%m-%d_%H-%M-%S'))

    print("\nParameters:")
    for attr, value in sorted(args.__dict__.items()):
        if attr == 'word_embed':
            continue
        print("\t{}={}".format(attr.upper(), value))

    return nd
コード例 #12
0
def getCostscapeDTLReconGraphs(DTLReconGraphPairs, hostTree, parasiteTree, \
  phi):
    """This function takes as input DTLReconGraphPairs, a list of tuples with 
	transfer and loss costs, and the hostTree, parasiteTree, and phi. It 
	returns a list of DTLReconGraphs whose scores are computed with the 
	transfer and loss values from each element in DTLReconGraphPairs."""

    DTLReconGraphList = []
    for cost in DTLReconGraphPairs:
        #assign those associated costs to the newDTLReconGraph
        newDTLReconGraph = DP(hostTree, parasiteTree, phi, 1, cost[0], \
         cost[1])[0]
        DTLReconGraphList.append(newDTLReconGraph)
    return DTLReconGraphList
コード例 #13
0
ファイル: Clusters.py プロジェクト: billdthompson/NorthWind
def makeInferenceForPlotting(N,cp,D):
	"grab MAP inferences under WANG"
	inference = sampleWang(D,cp)
	nCats = len(set(inference[1]))
	MAPMus, MAPSDs = [],[]
	for i in range(nCats):
		datasetCat = []
		for j in range(len(inference[0])):
			if inference[1][j] == i:
				datasetCat.append(inference[0][j])
		muCat, sigmaCat = DP.getMAP(datasetCat,paramsDict= modelParameters)
		MAPMus.append(muCat)
		MAPSDs.append(sigmaCat)
	return D, MAPMus, MAPSDs
コード例 #14
0
ファイル: main.py プロジェクト: silongzhang/RLInventory
def main(argv):
    try:
        if argv[1] == 'DP':
            value = DP.DPMethod(argv[2])
        elif argv[1] == 'DQN':
            # InFile, batch_size, buffer_size, episodes_train, episodes_test, startTime
            value = DQN.DQNMethod(argv[2], int(argv[3]), int(argv[4]),
                                  int(argv[5]), int(argv[6]),
                                  time.process_time())
        else:
            raise Exception
        print('value = {}'.format(value))
    except:
        utils.printErrorAndExit('main')
コード例 #15
0
def unitScoreDTL(hostTree, parasiteTree, phi, D, T, L):
	""" Takes a hostTree, parasiteTree, tip mapping function phi, and 
	duplication cost (D), transfer cost (T), and loss cost (L) and returns the
	DTL graph in the form of a dictionary, with event scores set to 1. 
	Cospeciation is assumed to cost 0. """
	DTLReconGraph, numRecon = DP.DP(hostTree, parasiteTree, phi, D, T, L)
	newDTL = {}
	for vertex in DTLReconGraph:
		newDTL[vertex] = []
		for event in DTLReconGraph[vertex][:-1]:
			newEvent = event[:-1] + [1.0]
			newDTL[vertex].append(newEvent)
		newDTL[vertex].append(DTLReconGraph[vertex][-1])
	return newDTL
コード例 #16
0
def gethourdata(name):
	cursor = readarchiveddata(name,'hour').limit(10)
	p = {}
	index = 0
	n0 = {}
	for i in cursor:
		n = str(time.localtime(i['hourtime'] * 3600).tm_hour)
		if n not in n0: 
			n0[n] = index
			index = index + 1
		temp = n0[n]
		for j in config.INFOS:
			if j not in p:p[j] = {}
			p[j][n] = [temp * 11,DP.percentify(name,j,i[j])]
	return p
コード例 #17
0
def DQNMethod(InFile, batch_size, buffer_size, episodes_train, episodes_test,
              startTime):
    parameterDP, pdf, rf, demand = DP.readData(InFile)
    global T, N
    T = parameterDP.T
    N = parameterDP.N

    env = Environment(parameterDP, pdf, rf, demand)
    replayBuffer = ReplayBuffer(buffer_size)

    print('Training ...')
    train(env, nets, replayBuffer, batch_size, episodes_train, episodes_test,
          startTime)

    print('Testing ...')
    return test(env, nets, episodes_test, 0)
コード例 #18
0
def getdaydata(name):
	cursor = readarchiveddata(name,'day').limit(10)
	p = {}
	index = 0
	n0 = {}
	for i in cursor:
		a = time.localtime(i['daytime'] * 3600 * 24)
		n = str((a.tm_mon,a.tm_mday))[1:-1]
		if n not in n0: 
			n0[n] = index
			index = index + 1
		temp = n0[n]
		for j in config.INFOS:
			if j not in p:p[j] = {}
			p[j][n] = [index * 11,DP.percentify(name,j,i[j])]
	return p
コード例 #19
0
def Reconcile(argList):
    """Takes command-line arguments of a .newick file, duplication, transfer, 
	and loss costs, the type of scoring desired and possible switch and loss 
	ranges. Creates Files for the host, parasite, and reconciliations"""
    fileName = argList[1]  #.newick file
    D = float(argList[2])  # Duplication cost
    T = float(argList[3])  # Transfer cost
    L = float(argList[4])  # Loss cost
    freqType = argList[5]  # Frequency type
    # Optional inputs if freqType == xscape
    switchLo = float(argList[6])  # Switch lower boundary
    switchHi = float(argList[7])  # Switch upper boundary
    lossLo = float(argList[8])  # Loss lower boundary
    lossHi = float(argList[9])  # Loss upper boundary

    host, paras, phi = newickFormatReader.getInput(fileName)
    hostRoot = cycleCheckingGraph.findRoot(host)
    hostv = cycleCheckingGraph.treeFormat(host)
    Order = orderGraph.date(hostv)
    # Default scoring function (if freqtype== Frequency scoring)
    DTLReconGraph, numRecon = DP.DP(host, paras, phi, D, T, L)
    print DTLReconGraph, numRecon
    #uses xScape scoring function
    if freqType == "xscape":
        DTLReconGraph = calcCostscapeScore.newScoreWrapper(fileName, switchLo, \
         switchHi, lossLo, lossHi, D, T, L)
    #uses Unit scoring function
    elif freqType == "unit":
        DTLReconGraph = unitScoreDTL(host, paras, phi, D, T, L)

    DTLGraph = copy.deepcopy(DTLReconGraph)
    scoresList, rec = Greedy.Greedy(DTLGraph, paras)
    for n in range(len(rec)):
        graph = cycleCheckingGraph.buildReconciliation(host, paras, rec[n])
        currentOrder = orderGraph.date(graph)
        if currentOrder == "timeTravel":
            rec[n], currentOrder = detectCycles.detectCyclesWrapper(
                host, paras, rec[n])
            currentOrder = orderGraph.date(currentOrder)
        hostOrder = hOrder(hostv, currentOrder)
        hostBranchs = branch(hostv, hostOrder)
        if n == 0:
            newickToVis.convert(fileName, hostBranchs, n, 1)
        else:
            newickToVis.convert(fileName, hostBranchs, n, 0)
        # filename[:-7] is the file name minus the .newick
        reconConversion.convert(rec[n], DTLReconGraph, paras, fileName[:-7], n)
コード例 #20
0
def freqSummation(argList):
    """Takes as input an argument list containing a newick file of host and 
	parasite trees as well as their phi mapping, duplication, transfer, and 
	loss costs, the type of frequency scoring to be used, as well as switch 
	and loss cost ranges for xscape scoring, and returns a file containing the
	list of scores for each individual reconciliation, the sum of the those 
	scores, the total cost of those reconciliations and the number of 
	reconciliations of those trees."""
    newickFile = argList[1]
    D = float(argList[2])
    T = float(argList[3])
    L = float(argList[4])
    freqType = argList[5]
    switchLo = float(argList[6])
    switchHi = float(argList[7])
    lossLo = float(argList[8])
    lossHi = float(argList[9])
    fileName = newickFile[:-7]
    f = open(fileName + "freqFile.txt", 'w')
    host, paras, phi = newickFormatReader.getInput(newickFile)
    DTL, numRecon = DP.DP(host, paras, phi, D, T, L)
    if freqType == "Frequency":
        newDTL = DTL
    elif freqType == "xscape":
        newDTL = calcCostscapeScore.newScoreWrapper(newickFile, switchLo,
                                                    switchHi, lossLo, lossHi,
                                                    D, T, L)
    elif freqType == "unit":
        newDTL = MasterReconciliation.unitScoreDTL(host, paras, phi, D, T, L)
    scoresList, reconciliation = Greedy.Greedy(newDTL, paras)
    totalSum = 0
    for score in scoresList:
        totalSum += score
    for index in reconciliation:
        totalCost = 0
        for key in index:
            if index[key][0] == "L":
                totalCost += L
            elif index[key][0] == "T":
                totalCost += T
            elif index[key][0] == "D":
                totalCost += D
    f.write(str(scoresList) + '\n')
    f.write(str(totalSum) + '\n')
    f.write(str(totalCost) + '\n')
    f.write(str(numRecon))
    f.close()
コード例 #21
0
ファイル: Clusters.py プロジェクト: billdthompson/NorthWind
def IL(startMuA, startMuB, startSDA, startSDB, gens, N,cp=10.,parameters=modelParameters):
	D = sampleData(mus=[startMuA,startMuB],variances = [startSDA,startSDB],N=N)
	nCatData, musData, SDsData = [],[],[]
	for g in range(gens):
		inference = sampleWang(D,cp,parameters)
		#print "Inference:", inference
		nCats = len(set(inference[1]))
		nCatData.append(nCats)
		thisGenMus, thisGenSDs = [],[]
		for i in range(nCats):
			datasetCat = []
			for j in range(len(inference[0])):
				if inference[1][j] == i:
					datasetCat.append(inference[0][j])
			muCat, sigmaCat = DP.getMAP(datasetCat,parameters)
			thisGenMus.append(muCat)
			thisGenSDs.append(sigmaCat)
		musData.append(thisGenMus)
		SDsData.append(thisGenSDs)
		D = sampleData(mus=thisGenMus,variances = thisGenSDs,N=N)
	return nCatData,musData,SDsData
コード例 #22
0
def Reconcile(argList):
	"""Takes command-line arguments of a .newick file, duplication, transfer, 
	and loss costs, the type of scoring desired and possible switch and loss 
	ranges. Creates Files for the host, parasite, and reconciliations"""
	fileName = argList[1] #.newick file
	D = float(argList[2]) # Duplication cost
	T = float(argList[3]) # Transfer cost
	L = float(argList[4]) # Loss cost
	freqType = argList[5] # Frequency type
	# Optional inputs if freqType == xscape
	switchLo = float(argList[6]) # Switch lower boundary
	switchHi = float(argList[7]) # Switch upper boundary
	lossLo = float(argList[8]) # Loss lower boundary
	lossHi = float(argList[9]) # Loss upper boundary

	host, paras, phi = newickFormatReader.getInput(fileName)
	hostRoot = ReconciliationGraph.findRoot(host)
	# Default scoring function (if freqtype== Frequency scoring)
	DTLReconGraph, numRecon = DP.DP(host, paras, phi, D, T, L)
	#uses xScape scoring function
	# if freqType == "xscape":
	# 	DTLReconGraph = calcCostscapeScore.newScoreWrapper(fileName, switchLo, \
	# 		switchHi, lossLo, lossHi, D, T, L)
	#uses Unit scoring function
	if freqType == "unit":
		DTLReconGraph = unitScoreDTL(host, paras, phi, D, T, L)

	DTLGraph = copy.deepcopy(DTLReconGraph)
	scoresList, recs = Greedy.Greedy(DTLGraph, paras)

	infeasible_recs = []
	for rec in recs:
		if orderGraph.date(ReconciliationGraph.buildReconciliation(host, paras, rec)) == False:
			infeasible_recs.append(rec)

	return infeasible_recs, recs
コード例 #23
0
 def setofaction(self, t, current_state):
     return DP.setofaction(t, self.parameterDP, current_state)
コード例 #24
0
ファイル: main.py プロジェクト: bobcheng15/RL_DP
import DP
import numpy as np
import sys

if __name__ == "__main__":

    map = np.loadtxt("../testcase/case" + sys.argv[1] + ".txt")
    terminal = np.loadtxt("../testcase/terminal" + sys.argv[1] + ".txt",
                          dtype='i')
    size_row, size_col = map.shape
    it = iter(terminal)
    terminal = list(zip(it, it))
    dp = DP.DP(size_row, size_col, map, terminal, -1)
    if (sys.argv[2] == "pi"):
        count = dp.policyIteration()
        print("Stop after " + str(count) + " iteration of policy iteration")

    else:
        count = dp.valueIteration()
        print("Stop after " + str(count) + " iteration of value iteration")
    dp.plot(sys.argv[1], sys.argv[2])
コード例 #25
0
ファイル: dp_draft.py プロジェクト: rchurch4/Projects
def HVC(X, p):
	n = len(X)
	l = n/p
	d = len(X[0])

	clusters = []
	for i in range(1, d):
		s = sorted(X, key=lambda x: x[i])
		c = dp.cluster1D(s, len(s), i, p)
		clusters.append(c)

	#build relationship matrix
	#empty matrix [n, n]
	F = np.zeros((n, n))

	#for each 1-d clustering
	for y in clusters:
		idx = 0
		#take each cluster, with factor higher for earlier clusters
		while idx < len(y):
			c1 = y[idx]
			factor = len(y) - math.pow(6, idx)
			if factor < 0:
				factor = 0
			#and for each pair in the cluster
			i1 = 0
			while i1 < len(c1)-1:
				i2 = i1+1
				while i2 < len(c1):
					#add the factor of the cluster to their partnership F[x, q]
					F[c1[i1][0], c1[i2][0]] += factor/(len(clusters)*p)
					F[c1[i2][0], c1[i1][0]] += factor/(len(clusters)*p)
					i2+=1
				i1+=1
			idx+=1

	print F
	X = sorted(X, key=lambda student: student[0])

	clusters = list()
	while len(clusters) < l:
		clusters.append(list())

	first_round = True
	while len(clusters[l-1]) < p:
		#print "next round"
		for i in clusters:
			if first_round:
				while len(i) < 2:
					next_idx = rand.randint(0,n-1)
					if F[next_idx, 0] != -1:
						i.append(X[next_idx])
						F[next_idx, :] = -1
			else:
				maxIdx = 0
				maxVal = 0
				index = 0

				while index < min(len(i), 2):
					test = i[index][0]
					possIdx = 0
					for j in range(0, len(F[0])):
						# if possIdx[1] == test:
						# 	#F[possIdx[1], test] = -1
						# 	possIdx = np.unravel_index(np.argmax(F[:, test]), (n,n))
						possMax = F[j, test]
						if possMax >= maxVal:
							maxIdx = j
							maxVal = possMax
					index += 1
				i.append(X[maxIdx])
				F[maxIdx, :] = -1
		first_round = False

	return clusters
コード例 #26
0
ファイル: example_4_2.py プロジェクト: tungkw/rl_intro_book
        num1 = state // 21
        num2 = state % 21
        actions = []
        for a in range(self.action_size):
            moved = a - 5
            if num1 + moved < 0 or num2 - moved < 0: continue
            actions.append(a)
        return actions

    def print_evaluation(self):
        print("value matrix")
        for i in range(21):
            for j in range(21):
                state_idx = (i * 21 + j)
                print("{:.3f}".format(self.v[state_idx]), end=' ')
            print()

    def print_improvement(self):
        print("policy matrix")
        for i in range(21):
            for j in range(21):
                state_idx = (i * 21 + j)
                print(self.p[state_idx], end=' ')
            print()


if __name__ == "__main__":
    agent = Agent(21 * 21, 11, 0.9)
    method = DP.algo(agent, 0.0001)
    method.policy_iteration()  #show=True)
コード例 #27
0
ファイル: example_4_1.py プロジェクト: tungkw/rl_intro_book
    def reward(self, state, action):
        if state == 0:
            return 0.0
        else:
            return -1.0

    def get_actions(self, state):
        return [i for i in range(self.action_size)]

    def print_evaluation(self):
        print("value matrix")
        for i in range(4):
            for j in range(4):
                state_idx = (i*4+j) % 15
                print("{:.3f}".format(self.v[state_idx]), end=' ')
            print()
    
    def print_improvement(self):
        print("policy matrix")
        for i in range(4):
            for j in range(4):
                state_idx = (i*4+j) % 15
                print(self.p[state_idx], end=' ')
            print()

if __name__ == "__main__":
    test = myAgent(15, 4, 1.0)
    method = DP.algo(test, threshold=0.0001)
    method.policy_evaluation(show=True)
    # method.policy_iteration(show=True)
    # method.value_iteration(show=True)
コード例 #28
0
		attributes = list()
		attributes.append(nextId)
		while len(attributes) < d+1:
			meth = int(rand.normalvariate(normMean, normStdev)) #random number, 0-100
			attributes.append(meth)

		nextStudent = tuple(attributes)
		dataset.append(nextStudent)
		nextId += 1

	#sort the dataset by intelligence
	randomCopy = cp.deepcopy(dataset)
	dataset = sorted(dataset, key=lambda student: student[1])

	originalCopy = cp.deepcopy(dataset)
	l1 = DP.cluster1D(dataset, size, 1, p)

	clusters = list()
	clusters.append(l1)

	dim = 2
	while dim <= d:
		dataset = sorted(dataset, key=lambda student: student[dim])
		l2 = DP.cluster1D(dataset, size, dim, p)
		clusters.append(l2)
		dim += 1


	#####################################
	#		   Cluster Ranking 			#
	#									#
コード例 #29
0
ファイル: MainForNewCluster.py プロジェクト: KingSpencer/VaDE
        # to DP
        # DPParam = DP_fit(z_batch)
        # DPParam = np.ones((batch_size))
        # gamma: 'LPMtx' (batch_size, # of cluster)
        # N : 'Nvec' (# of cluster, )
        # m : 'm' (# of cluster, latent_dim)
        # W : 'B' (# of cluster, latent_dim, latent_dim)
        # v: 'nu' (# of cluster)

        # DPParam = DPObj.fit(z_batch)
        if dataset == 'firstBatch' or dataset == 'secondBatch':
            newinitname = results.initModelPath
            DPObj = DP.DP(output_path=fullOutputPath,
                          initname=newinitname,
                          gamma1=gamma1,
                          gamma0=gamma0,
                          sf=sf,
                          nBatch=nBatch,
                          taskID=taskID)
            DPParam, newinitname = DPObj.fitWithWarmStart(z_batch, newinitname)
        else:
            if epoch == 0 and iteration == 0:
                newinitname = 'randexamples'
                if dataset == 'reuters10k':
                    DPObj = DP.DP(output_path=fullOutputPath,
                                  initname=newinitname,
                                  gamma1=gamma1,
                                  gamma0=gamma0,
                                  Kmax=Kmax,
                                  sf=sf,
                                  nBatch=nBatch,
コード例 #30
0
ファイル: VAEDP.py プロジェクト: KingSpencer/VaDE
## make full output path
fullOutputPath = createOutputFolderName(outputPath, Kmax, dataset, epoch,
                                        batch_iter, scale, batchsize, rep, sf)
## name log file and write console output to log.txt
logFileName = os.path.join(fullOutputPath, 'log.txt')

if results.logFile:
    sys.stdout = open(logFileName, 'w')

MNIST_df = XData(aa['z'], dtype='auto')
##########################################################
## create a DP object and get DPParam
DPObj = DP.DP(output_path=fullOutputPath,
              initname='randexamples',
              gamma1=gamma1,
              gamma0=gamma0,
              Kmax=Kmax,
              sf=sf,
              nLap=nLap,
              taskID=taskID)
DPParam, newinitname = DPObj.fit(aa['z'])
## after training model, get DPParam
#########################################################
## add evaluation summary metric and save results
#########################################################
## get z_fit from the encoder and fit with DP model to get all the labels for all training data
z_fit = aa['z']
fittedY = obtainFittedYFromDP(DPParam, z_fit)


####################################
## Obtain the relationship between fittec class lable and true label, stored in a dictionary