Esempio n. 1
0
def get_history_lengths(events, histScores, simhist=None, usepreval=False):
    numhists = histScores.shape[0]
    mymax = len(events) + 500
    myranks = np.ones((numhists, len(events))) * mymax
    for j in xrange(len(events)):
        e = events[j]
        hindices = ecycles.historyids_to_indices(e.histories, histScores)
        if usepreval:
            if simhist is not None and simhist in e.histories:
                myranks[hindices, j] = np.array(e.prevals, dtype=float)
            else:
                myranks[hindices, j] = np.array(e.prevals, dtype=float)
        else:
            if simhist is not None and simhist in e.histories:
                myranks[hindices, j] = np.array(e.orders, dtype=float)
            else:
                myranks[hindices, j] = np.array(e.orders, dtype=float)

#change the orders into ranks.
    hlengths = np.sum(myranks < mymax, axis=1).astype('float')
    hlengths[hlengths == 1] = 1.1
    xord = myranks.argsort(axis=1)
    xranks = xord.argsort(axis=1)
    cranks = xranks.astype('float') / (hlengths[:, None] - 1)
    return (hlengths.astype('int'), cranks)
def make_event_costs_array(event, historyScores, numruns, runlen): 
#	if event.histories==[]: 
#		event.histories=histseg.listout_ranges(event.histRanges)
	hids=np.array(event.histories, dtype=int)
	his=histseg.historyids_to_indices(hids, historyScores)
	eventcosts=np.ones((runlen*numruns))* -1
	eventcosts[his]=np.mean(np.vstack((event.uppercosts, event.lowercosts)), axis=0)
	return eventcosts.reshape((runlen, numruns),order='F' )
Esempio n. 3
0
def get_hcost_difference(eventA, eventB, simulation, historyScores, shufstart):
    shuffledat = {'abcost': 0, 'bacost': 0, 'tot': 0, 'abcnt': 0, 'bacnt': 0}
    noshuffdat = {'abcost': 0, 'bacost': 0, 'tot': 0, 'abcnt': 0, 'bacnt': 0}
    truth = -1
    ai = np.in1d(np.array(eventA.histories), np.array(eventB.histories))
    if sum(ai) > 0:
        hids = np.array(eventA.histories)[ai]
        # hids now has the history ids where both eventA and eventB exist together
        if simulation and 0 in hids:
            hids = hids[hids != 0]
            atruth = eventA.orders[eventA.histories.index(0)]
            btruth = eventB.orders[eventB.histories.index(0)]
            if atruth < btruth:
                truth = 1
            elif btruth < atruth:
                truth = 2
            else:
                truth = 0
        # split hids into those before shuffling and after shuffling.
        (hids1, hids2) = split_hids_before_after_iteration(hids, shufstart)
        for (hids, dat) in [(hids1, noshuffdat), (hids2, shuffledat)]:
            ai = np.in1d(np.array(eventA.histories), hids)
            bi = np.in1d(np.array(eventB.histories), hids)
            aorders = np.array(eventA.orders)[ai]
            borders = np.array(eventB.orders)[bi]
            abi = np.array(eventA.histories)[ai][aorders < borders]
            dat['abcnt'] = len(abi)
            if len(abi) > 0:
                hi = ecycles.historyids_to_indices(abi, historyScores)
                dat['abcost'] = np.mean(historyScores[hi, 1:3])
            bai = np.array(eventA.histories)[ai][borders < aorders]
            dat['bacnt'] = len(bai)
            if len(bai) > 0:
                hi = ecycles.historyids_to_indices(bai, historyScores)
                dat['bacost'] = np.mean(historyScores[hi, 1:3])
            eqhids = np.array(eventA.histories)[ai][borders == aorders]
            if len(eqhids) > 0:
                hi = ecycles.historyids_to_indices(eqhids, historyScores)
                dat['eqcost'] = np.mean(historyScores[hi, 1:3])
            dat['tot'] = sum(ai)
    keys = ['abcost', 'bacost', 'tot', 'abcnt', 'bacnt']
    mystr = str(truth)
    for dat in [shuffledat, noshuffdat]:
        for k in keys:
            mystr += "\t" + str(dat[k])
    return (mystr)
Esempio n. 4
0
def get_hcost_difference(eventA, eventB, simulation, historyScores, shufstart): 
	shuffledat={'abcost': 0, 'bacost': 0, 'tot':0, 'abcnt': 0, 'bacnt':0}
	noshuffdat={'abcost': 0, 'bacost': 0, 'tot':0, 'abcnt': 0, 'bacnt':0}
	truth=-1
	ai=np.in1d(np.array(eventA.histories), np.array(eventB.histories))
	if sum(ai)>0: 
		hids=np.array(eventA.histories)[ai]
		# hids now has the history ids where both eventA and eventB exist together 
		if simulation and 0 in hids:
			hids=hids[hids!=0] 
			atruth=eventA.orders[eventA.histories.index(0)]	
			btruth=eventB.orders[eventB.histories.index(0)]
			if atruth<btruth: 
				truth=1
			elif btruth < atruth: 
				truth=2
			else: truth=0
		# split hids into those before shuffling and after shuffling. 
		(hids1, hids2) = split_hids_before_after_iteration(hids, shufstart)
		for (hids, dat) in [(hids1, noshuffdat), (hids2, shuffledat)]:
			ai=np.in1d(np.array(eventA.histories), hids)
			bi=np.in1d(np.array(eventB.histories), hids)
			aorders=np.array(eventA.orders)[ai]
			borders=np.array(eventB.orders)[bi]
			abi=np.array(eventA.histories)[ai][aorders<borders]
			dat['abcnt']=len(abi)
			if len(abi)>0: 
				hi=ecycles.historyids_to_indices(abi, historyScores)
				dat['abcost']=np.mean(historyScores[hi,1:3])
			bai=np.array(eventA.histories)[ai][borders<aorders]
			dat['bacnt']=len(bai)
			if len(bai)>0: 
				hi=ecycles.historyids_to_indices(bai, historyScores)
				dat['bacost']=np.mean(historyScores[hi,1:3])
			eqhids=np.array(eventA.histories)[ai][borders==aorders]
			if len(eqhids)>0: 
				hi=ecycles.historyids_to_indices(eqhids, historyScores)
				dat['eqcost']=np.mean(historyScores[hi,1:3])
			dat['tot']=sum(ai)
	keys=['abcost', 'bacost', 'tot', 'abcnt', 'bacnt']
	mystr=str(truth)
	for dat in [shuffledat, noshuffdat]: 
		for k in keys: 
			mystr+="\t"+str(dat[k])
	return(mystr) 
Esempio n. 5
0
def count_earlylate_with_correction(events, historyScores, outfn1, outfn2):
    numhists = historyScores.shape[0]
    mymax = len(events) + 1
    myranks = np.ones((numhists, len(events))) * mymax
    myTPranks = np.ones((numhists, len(events))) * mymax
    #	sys.stderr.write("myranks: %s, myTPranks %s\n" % (str(myranks.shape), str(myTPranks.shape)))
    simhist = 0
    for j in xrange(len(events)):
        e = events[j]
        #e.histories=ecycles.listout_ranges(e.histRanges)
        hindices = ecycles.historyids_to_indices(e.histories, historyScores)
        for h in xrange(len(e.histories)):
            i = hindices[h]
            myord = float(e.orders[h])
            myranks[i, j] = myord
            if simhist in e.histories:
                myTPranks[i, j] = myord
    # change the orders into ranks.  The ranks will be different if we just look at the TP events in a history vs if we include all of the events.
    hlengths = np.sum(myranks < mymax, axis=1).astype('float')
    trueonlylen = np.sum(myTPranks < mymax, axis=1).astype('float')
    np.savetxt(outfn2,
               np.vstack((hlengths, trueonlylen)).T,
               fmt="%d",
               delimiter='\t',
               header="length\tlen_onlytrue")
    #process the ranks of the events including the whole history
    xord = myranks[hlengths > 0, :].argsort()
    xranks = xord.argsort()
    cranks = xranks.astype('float')
    hlengths = hlengths[hlengths > 0]
    for i in xrange(hlengths.shape[0]):
        cranks[i, :] = cranks[i, :] / (hlengths[i] - 1)
    #process the ranks for the histories including only the TP events
    xord = myTPranks[trueonlylen > 0, :].argsort()
    xranks = xord.argsort()
    cTPranks = xranks.astype('float')
    trueonlylen = trueonlylen[trueonlylen > 0]
    for i in xrange(trueonlylen.shape[0]):
        cTPranks[i, :] = cTPranks[i, :] / (trueonlylen[i] - 1)
    # skip history 0 because that's the simulated history
    truth = cranks[0, :]
    cranks = cranks[1:, :]
    cTPranks = cTPranks[1:, :]
    earlycnts = np.sum(cranks < 0.5, axis=0)
    latecnts = np.sum(np.logical_and(cranks > 0.5, cranks <= 1), axis=0)
    tpearlycnts = np.sum(cTPranks < 0.5, axis=0)
    tplatecnts = np.sum(np.logical_and(cTPranks > 0.5, cTPranks <= 1), axis=0)
    totcnts = np.sum(cranks <= 1, axis=0)
    outfh1 = open(outfn, 'w')
    outfh1.write(
        "EventID\tEvent_type\tearly\tlate\tearlyTP\tlateTP\tTotal\tTruth\n")
    for j in xrange(len(events)):
        e = events[j]
        outfh1.write("%s\t%s\t%s\n" % (e.id, e.determineEventType(), "\t".join(
            map(str, (earlycnts[j], latecnts[j], tpearlycnts[j], tplatecnts[j],
                      totcnts[j], truth[j])))))
    outfh1.close()
Esempio n. 6
0
def make_event_costs_array(event, historyScores, numruns, runlen):
    #	if event.histories==[]:
    #		event.histories=histseg.listout_ranges(event.histRanges)
    hids = np.array(event.histories, dtype=int)
    his = histseg.historyids_to_indices(hids, historyScores)
    eventcosts = np.ones((runlen * numruns)) * -1
    eventcosts[his] = np.mean(np.vstack((event.uppercosts, event.lowercosts)),
                              axis=0)
    return eventcosts.reshape((runlen, numruns), order='F')
def create_profile_matrices(edgelist, histScores):
    hprofiles = np.zeros((histScores.shape[0], len(edgelist)))
    pprofiles = np.zeros((histScores.shape[0], len(edgelist)))
    for i, e in enumerate(edgelist):
        hi = ecycles.historyids_to_indices(e.histories, histScores)
        hprofiles[hi, i] = e.cnval
        pprofiles[hi, i] = np.array(e.prevals)
        # get equivalent profiles after removing 0s
    order_by_prevals(hprofiles, pprofiles)
    pprofiles = collapse_zeros(pprofiles)
    hprofiles = collapse_zeros(hprofiles)
    return (hprofiles, pprofiles)
Esempio n. 8
0
def create_profile_matrices(edgelist, histScores):
    hprofiles = np.zeros((histScores.shape[0], len(edgelist)))
    pprofiles = np.zeros((histScores.shape[0], len(edgelist)))
    for i, e in enumerate(edgelist):
        hi = ecycles.historyids_to_indices(e.histories, histScores)
        hprofiles[hi, i] = e.cnval
        pprofiles[hi, i] = np.array(e.prevals)
    # get equivalent profiles after removing 0s
    order_by_prevals(hprofiles, pprofiles)
    pprofiles = collapse_zeros(pprofiles)
    hprofiles = collapse_zeros(hprofiles)
    return (hprofiles, pprofiles)
Esempio n. 9
0
def get_shuffle_history_scores(histscores, shuftime, simulation): 
	if simulation: 
		hids=histscores[histscores[:,0]>0,0]
	itr=np.fmod(hids, ecycles.Global_BINWIDTH)
	(hids1, hids2)=(hids[itr<shuftime], hids[itr>=shuftime])
	results=[]
	for hids in (hids1, hids2): 
		i=ecycles.historyids_to_indices(hids, histscores)
		a=histscores[i,1:3]
		b=np.ascontiguousarray(a).view(np.dtype((np.void, a.dtype.itemsize * a.shape[1])))
		x, idx = np.unique(b, return_index=True)
		results.append(a[idx])
	return results
Esempio n. 10
0
def count_earlylate_with_correction(events, historyScores, outfn1, outfn2): 
	numhists=historyScores.shape[0]
	mymax=len(events)+1
	myranks=np.ones((numhists, len(events))) * mymax 
	myTPranks=np.ones((numhists, len(events))) * mymax 
#	sys.stderr.write("myranks: %s, myTPranks %s\n" % (str(myranks.shape), str(myTPranks.shape)))
	simhist=0
	for j in xrange(len(events)): 
		e=events[j]
		#e.histories=ecycles.listout_ranges(e.histRanges)
		hindices = ecycles.historyids_to_indices(e.histories, historyScores)
		for h in xrange(len(e.histories)): 
			i=hindices[h]
			myord=float(e.orders[h])
			myranks[i,j]=myord
			if simhist in e.histories: 
				myTPranks[i,j]=myord 
	# change the orders into ranks.  The ranks will be different if we just look at the TP events in a history vs if we include all of the events. 
	hlengths=np.sum(myranks<mymax, axis=1).astype('float')
	trueonlylen=np.sum(myTPranks<mymax, axis=1).astype('float')
	np.savetxt(outfn2, np.vstack((hlengths, trueonlylen)).T, fmt="%d", delimiter='\t', header="length\tlen_onlytrue")
	#process the ranks of the events including the whole history
	xord=myranks[hlengths>0,:].argsort()
	xranks=xord.argsort()
	cranks=xranks.astype('float')
	hlengths=hlengths[hlengths>0]
	for i in xrange(hlengths.shape[0]): 
		cranks[i,:]=cranks[i,:]/(hlengths[i]-1)
	#process the ranks for the histories including only the TP events
	xord=myTPranks[trueonlylen>0,:].argsort()
	xranks=xord.argsort()
	cTPranks=xranks.astype('float')	
	trueonlylen=trueonlylen[trueonlylen>0]
	for i in xrange(trueonlylen.shape[0]): 
		cTPranks[i,:]=cTPranks[i,:]/(trueonlylen[i]-1)
	# skip history 0 because that's the simulated history
	truth=cranks[0,:]
	cranks=cranks[1:,:]
	cTPranks=cTPranks[1:,:]
	earlycnts=np.sum(cranks<0.5, axis=0)
	latecnts=np.sum(np.logical_and(cranks>0.5, cranks<=1), axis=0)
	tpearlycnts=np.sum(cTPranks<0.5, axis=0)
	tplatecnts=np.sum(np.logical_and(cTPranks>0.5, cTPranks<=1), axis=0)
	totcnts=np.sum(cranks<=1, axis=0)
	outfh1=open(outfn, 'w')
	outfh1.write("EventID\tEvent_type\tearly\tlate\tearlyTP\tlateTP\tTotal\tTruth\n")
	for j in xrange(len(events)): 
		e=events[j]
		outfh1.write("%s\t%s\t%s\n" % (e.id, e.determineEventType(), "\t".join(map(str, (earlycnts[j], latecnts[j], tpearlycnts[j], tplatecnts[j], totcnts[j], truth[j])))))	
	outfh1.close()
Esempio n. 11
0
def get_shuffle_history_scores(histscores, shuftime, simulation):
    if simulation:
        hids = histscores[histscores[:, 0] > 0, 0]
    itr = np.fmod(hids, ecycles.Global_BINWIDTH)
    (hids1, hids2) = (hids[itr < shuftime], hids[itr >= shuftime])
    results = []
    for hids in (hids1, hids2):
        i = ecycles.historyids_to_indices(hids, histscores)
        a = histscores[i, 1:3]
        b = np.ascontiguousarray(a).view(
            np.dtype((np.void, a.dtype.itemsize * a.shape[1])))
        x, idx = np.unique(b, return_index=True)
        results.append(a[idx])
    return results
Esempio n. 12
0
def count_early_vs_late(event, historylengths, simulation):
    #event.histories=ecycles.listout_ranges(event.histRanges)
    hindices = ecycles.historyids_to_indices(event.histories, historylengths)
    histlens = historylengths[hindices, 1]
    early = 0
    late = 0
    mid = 0
    truth = -1
    for i in xrange(len(event.histories)):
        h = event.histories[i]
        myord = event.orders[i]
        hlen = histlens[i]
        mytime = myord / hlen
        if h == 0 and simulation:
            if mytime > 0.5:
                truth = 1
            else:
                truth = 0
        else:
            if mytime <= 0.5:
                early = early + 1
            else:
                late = late + 1
    return (early, late, truth)
Esempio n. 13
0
def get_history_lengths(events, histScores, simhist=None, usepreval=False): 
	numhists=histScores.shape[0]
	mymax=len(events)+500
	myranks=np.ones((numhists, len(events))) * mymax 
	for j in xrange(len(events)):
		e=events[j]
		hindices = ecycles.historyids_to_indices(e.histories, histScores)
		if usepreval: 
			if simhist is not None and simhist in e.histories: 
				myranks[hindices,j]=np.array(e.prevals, dtype=float)
			else: 
				myranks[hindices,j]=np.array(e.prevals, dtype=float)
		else: 
			if simhist is not None and simhist in e.histories: 
				myranks[hindices,j]=np.array(e.orders, dtype=float)
			else: 
				myranks[hindices,j]=np.array(e.orders, dtype=float)
    #change the orders into ranks. 
	hlengths=np.sum(myranks<mymax, axis=1).astype('float')
	hlengths[hlengths==1]=1.1
	xord=myranks.argsort(axis=1)
	xranks=xord.argsort(axis=1)
	cranks=xranks.astype('float') / (hlengths[:,None] -1)
	return (hlengths.astype('int'), cranks)
Esempio n. 14
0
def count_early_vs_late(event, historylengths, simulation):
	#event.histories=ecycles.listout_ranges(event.histRanges)
	hindices = ecycles.historyids_to_indices(event.histories, historylengths) 
	histlens= historylengths[hindices,1]
	early=0
	late=0
	mid=0
	truth=-1
	for i in xrange(len(event.histories)): 
		h=event.histories[i]
		myord=event.orders[i]
		hlen=histlens[i]
		mytime=myord/hlen
		if h==0 and simulation:
			if mytime >0.5: 
				truth=1
			else: 
				truth=0
		else: 
			if mytime <=0.5: 
				early=early+1
			else:
				late=late+1
	return(early, late, truth)