def get_history_lengths(events, histScores, simhist=None, usepreval=False): numhists = histScores.shape[0] mymax = len(events) + 500 myranks = np.ones((numhists, len(events))) * mymax for j in xrange(len(events)): e = events[j] hindices = ecycles.historyids_to_indices(e.histories, histScores) if usepreval: if simhist is not None and simhist in e.histories: myranks[hindices, j] = np.array(e.prevals, dtype=float) else: myranks[hindices, j] = np.array(e.prevals, dtype=float) else: if simhist is not None and simhist in e.histories: myranks[hindices, j] = np.array(e.orders, dtype=float) else: myranks[hindices, j] = np.array(e.orders, dtype=float) #change the orders into ranks. hlengths = np.sum(myranks < mymax, axis=1).astype('float') hlengths[hlengths == 1] = 1.1 xord = myranks.argsort(axis=1) xranks = xord.argsort(axis=1) cranks = xranks.astype('float') / (hlengths[:, None] - 1) return (hlengths.astype('int'), cranks)
def make_event_costs_array(event, historyScores, numruns, runlen): # if event.histories==[]: # event.histories=histseg.listout_ranges(event.histRanges) hids=np.array(event.histories, dtype=int) his=histseg.historyids_to_indices(hids, historyScores) eventcosts=np.ones((runlen*numruns))* -1 eventcosts[his]=np.mean(np.vstack((event.uppercosts, event.lowercosts)), axis=0) return eventcosts.reshape((runlen, numruns),order='F' )
def get_hcost_difference(eventA, eventB, simulation, historyScores, shufstart): shuffledat = {'abcost': 0, 'bacost': 0, 'tot': 0, 'abcnt': 0, 'bacnt': 0} noshuffdat = {'abcost': 0, 'bacost': 0, 'tot': 0, 'abcnt': 0, 'bacnt': 0} truth = -1 ai = np.in1d(np.array(eventA.histories), np.array(eventB.histories)) if sum(ai) > 0: hids = np.array(eventA.histories)[ai] # hids now has the history ids where both eventA and eventB exist together if simulation and 0 in hids: hids = hids[hids != 0] atruth = eventA.orders[eventA.histories.index(0)] btruth = eventB.orders[eventB.histories.index(0)] if atruth < btruth: truth = 1 elif btruth < atruth: truth = 2 else: truth = 0 # split hids into those before shuffling and after shuffling. (hids1, hids2) = split_hids_before_after_iteration(hids, shufstart) for (hids, dat) in [(hids1, noshuffdat), (hids2, shuffledat)]: ai = np.in1d(np.array(eventA.histories), hids) bi = np.in1d(np.array(eventB.histories), hids) aorders = np.array(eventA.orders)[ai] borders = np.array(eventB.orders)[bi] abi = np.array(eventA.histories)[ai][aorders < borders] dat['abcnt'] = len(abi) if len(abi) > 0: hi = ecycles.historyids_to_indices(abi, historyScores) dat['abcost'] = np.mean(historyScores[hi, 1:3]) bai = np.array(eventA.histories)[ai][borders < aorders] dat['bacnt'] = len(bai) if len(bai) > 0: hi = ecycles.historyids_to_indices(bai, historyScores) dat['bacost'] = np.mean(historyScores[hi, 1:3]) eqhids = np.array(eventA.histories)[ai][borders == aorders] if len(eqhids) > 0: hi = ecycles.historyids_to_indices(eqhids, historyScores) dat['eqcost'] = np.mean(historyScores[hi, 1:3]) dat['tot'] = sum(ai) keys = ['abcost', 'bacost', 'tot', 'abcnt', 'bacnt'] mystr = str(truth) for dat in [shuffledat, noshuffdat]: for k in keys: mystr += "\t" + str(dat[k]) return (mystr)
def get_hcost_difference(eventA, eventB, simulation, historyScores, shufstart): shuffledat={'abcost': 0, 'bacost': 0, 'tot':0, 'abcnt': 0, 'bacnt':0} noshuffdat={'abcost': 0, 'bacost': 0, 'tot':0, 'abcnt': 0, 'bacnt':0} truth=-1 ai=np.in1d(np.array(eventA.histories), np.array(eventB.histories)) if sum(ai)>0: hids=np.array(eventA.histories)[ai] # hids now has the history ids where both eventA and eventB exist together if simulation and 0 in hids: hids=hids[hids!=0] atruth=eventA.orders[eventA.histories.index(0)] btruth=eventB.orders[eventB.histories.index(0)] if atruth<btruth: truth=1 elif btruth < atruth: truth=2 else: truth=0 # split hids into those before shuffling and after shuffling. (hids1, hids2) = split_hids_before_after_iteration(hids, shufstart) for (hids, dat) in [(hids1, noshuffdat), (hids2, shuffledat)]: ai=np.in1d(np.array(eventA.histories), hids) bi=np.in1d(np.array(eventB.histories), hids) aorders=np.array(eventA.orders)[ai] borders=np.array(eventB.orders)[bi] abi=np.array(eventA.histories)[ai][aorders<borders] dat['abcnt']=len(abi) if len(abi)>0: hi=ecycles.historyids_to_indices(abi, historyScores) dat['abcost']=np.mean(historyScores[hi,1:3]) bai=np.array(eventA.histories)[ai][borders<aorders] dat['bacnt']=len(bai) if len(bai)>0: hi=ecycles.historyids_to_indices(bai, historyScores) dat['bacost']=np.mean(historyScores[hi,1:3]) eqhids=np.array(eventA.histories)[ai][borders==aorders] if len(eqhids)>0: hi=ecycles.historyids_to_indices(eqhids, historyScores) dat['eqcost']=np.mean(historyScores[hi,1:3]) dat['tot']=sum(ai) keys=['abcost', 'bacost', 'tot', 'abcnt', 'bacnt'] mystr=str(truth) for dat in [shuffledat, noshuffdat]: for k in keys: mystr+="\t"+str(dat[k]) return(mystr)
def count_earlylate_with_correction(events, historyScores, outfn1, outfn2): numhists = historyScores.shape[0] mymax = len(events) + 1 myranks = np.ones((numhists, len(events))) * mymax myTPranks = np.ones((numhists, len(events))) * mymax # sys.stderr.write("myranks: %s, myTPranks %s\n" % (str(myranks.shape), str(myTPranks.shape))) simhist = 0 for j in xrange(len(events)): e = events[j] #e.histories=ecycles.listout_ranges(e.histRanges) hindices = ecycles.historyids_to_indices(e.histories, historyScores) for h in xrange(len(e.histories)): i = hindices[h] myord = float(e.orders[h]) myranks[i, j] = myord if simhist in e.histories: myTPranks[i, j] = myord # change the orders into ranks. The ranks will be different if we just look at the TP events in a history vs if we include all of the events. hlengths = np.sum(myranks < mymax, axis=1).astype('float') trueonlylen = np.sum(myTPranks < mymax, axis=1).astype('float') np.savetxt(outfn2, np.vstack((hlengths, trueonlylen)).T, fmt="%d", delimiter='\t', header="length\tlen_onlytrue") #process the ranks of the events including the whole history xord = myranks[hlengths > 0, :].argsort() xranks = xord.argsort() cranks = xranks.astype('float') hlengths = hlengths[hlengths > 0] for i in xrange(hlengths.shape[0]): cranks[i, :] = cranks[i, :] / (hlengths[i] - 1) #process the ranks for the histories including only the TP events xord = myTPranks[trueonlylen > 0, :].argsort() xranks = xord.argsort() cTPranks = xranks.astype('float') trueonlylen = trueonlylen[trueonlylen > 0] for i in xrange(trueonlylen.shape[0]): cTPranks[i, :] = cTPranks[i, :] / (trueonlylen[i] - 1) # skip history 0 because that's the simulated history truth = cranks[0, :] cranks = cranks[1:, :] cTPranks = cTPranks[1:, :] earlycnts = np.sum(cranks < 0.5, axis=0) latecnts = np.sum(np.logical_and(cranks > 0.5, cranks <= 1), axis=0) tpearlycnts = np.sum(cTPranks < 0.5, axis=0) tplatecnts = np.sum(np.logical_and(cTPranks > 0.5, cTPranks <= 1), axis=0) totcnts = np.sum(cranks <= 1, axis=0) outfh1 = open(outfn, 'w') outfh1.write( "EventID\tEvent_type\tearly\tlate\tearlyTP\tlateTP\tTotal\tTruth\n") for j in xrange(len(events)): e = events[j] outfh1.write("%s\t%s\t%s\n" % (e.id, e.determineEventType(), "\t".join( map(str, (earlycnts[j], latecnts[j], tpearlycnts[j], tplatecnts[j], totcnts[j], truth[j]))))) outfh1.close()
def make_event_costs_array(event, historyScores, numruns, runlen): # if event.histories==[]: # event.histories=histseg.listout_ranges(event.histRanges) hids = np.array(event.histories, dtype=int) his = histseg.historyids_to_indices(hids, historyScores) eventcosts = np.ones((runlen * numruns)) * -1 eventcosts[his] = np.mean(np.vstack((event.uppercosts, event.lowercosts)), axis=0) return eventcosts.reshape((runlen, numruns), order='F')
def create_profile_matrices(edgelist, histScores): hprofiles = np.zeros((histScores.shape[0], len(edgelist))) pprofiles = np.zeros((histScores.shape[0], len(edgelist))) for i, e in enumerate(edgelist): hi = ecycles.historyids_to_indices(e.histories, histScores) hprofiles[hi, i] = e.cnval pprofiles[hi, i] = np.array(e.prevals) # get equivalent profiles after removing 0s order_by_prevals(hprofiles, pprofiles) pprofiles = collapse_zeros(pprofiles) hprofiles = collapse_zeros(hprofiles) return (hprofiles, pprofiles)
def get_shuffle_history_scores(histscores, shuftime, simulation): if simulation: hids=histscores[histscores[:,0]>0,0] itr=np.fmod(hids, ecycles.Global_BINWIDTH) (hids1, hids2)=(hids[itr<shuftime], hids[itr>=shuftime]) results=[] for hids in (hids1, hids2): i=ecycles.historyids_to_indices(hids, histscores) a=histscores[i,1:3] b=np.ascontiguousarray(a).view(np.dtype((np.void, a.dtype.itemsize * a.shape[1]))) x, idx = np.unique(b, return_index=True) results.append(a[idx]) return results
def count_earlylate_with_correction(events, historyScores, outfn1, outfn2): numhists=historyScores.shape[0] mymax=len(events)+1 myranks=np.ones((numhists, len(events))) * mymax myTPranks=np.ones((numhists, len(events))) * mymax # sys.stderr.write("myranks: %s, myTPranks %s\n" % (str(myranks.shape), str(myTPranks.shape))) simhist=0 for j in xrange(len(events)): e=events[j] #e.histories=ecycles.listout_ranges(e.histRanges) hindices = ecycles.historyids_to_indices(e.histories, historyScores) for h in xrange(len(e.histories)): i=hindices[h] myord=float(e.orders[h]) myranks[i,j]=myord if simhist in e.histories: myTPranks[i,j]=myord # change the orders into ranks. The ranks will be different if we just look at the TP events in a history vs if we include all of the events. hlengths=np.sum(myranks<mymax, axis=1).astype('float') trueonlylen=np.sum(myTPranks<mymax, axis=1).astype('float') np.savetxt(outfn2, np.vstack((hlengths, trueonlylen)).T, fmt="%d", delimiter='\t', header="length\tlen_onlytrue") #process the ranks of the events including the whole history xord=myranks[hlengths>0,:].argsort() xranks=xord.argsort() cranks=xranks.astype('float') hlengths=hlengths[hlengths>0] for i in xrange(hlengths.shape[0]): cranks[i,:]=cranks[i,:]/(hlengths[i]-1) #process the ranks for the histories including only the TP events xord=myTPranks[trueonlylen>0,:].argsort() xranks=xord.argsort() cTPranks=xranks.astype('float') trueonlylen=trueonlylen[trueonlylen>0] for i in xrange(trueonlylen.shape[0]): cTPranks[i,:]=cTPranks[i,:]/(trueonlylen[i]-1) # skip history 0 because that's the simulated history truth=cranks[0,:] cranks=cranks[1:,:] cTPranks=cTPranks[1:,:] earlycnts=np.sum(cranks<0.5, axis=0) latecnts=np.sum(np.logical_and(cranks>0.5, cranks<=1), axis=0) tpearlycnts=np.sum(cTPranks<0.5, axis=0) tplatecnts=np.sum(np.logical_and(cTPranks>0.5, cTPranks<=1), axis=0) totcnts=np.sum(cranks<=1, axis=0) outfh1=open(outfn, 'w') outfh1.write("EventID\tEvent_type\tearly\tlate\tearlyTP\tlateTP\tTotal\tTruth\n") for j in xrange(len(events)): e=events[j] outfh1.write("%s\t%s\t%s\n" % (e.id, e.determineEventType(), "\t".join(map(str, (earlycnts[j], latecnts[j], tpearlycnts[j], tplatecnts[j], totcnts[j], truth[j]))))) outfh1.close()
def get_shuffle_history_scores(histscores, shuftime, simulation): if simulation: hids = histscores[histscores[:, 0] > 0, 0] itr = np.fmod(hids, ecycles.Global_BINWIDTH) (hids1, hids2) = (hids[itr < shuftime], hids[itr >= shuftime]) results = [] for hids in (hids1, hids2): i = ecycles.historyids_to_indices(hids, histscores) a = histscores[i, 1:3] b = np.ascontiguousarray(a).view( np.dtype((np.void, a.dtype.itemsize * a.shape[1]))) x, idx = np.unique(b, return_index=True) results.append(a[idx]) return results
def count_early_vs_late(event, historylengths, simulation): #event.histories=ecycles.listout_ranges(event.histRanges) hindices = ecycles.historyids_to_indices(event.histories, historylengths) histlens = historylengths[hindices, 1] early = 0 late = 0 mid = 0 truth = -1 for i in xrange(len(event.histories)): h = event.histories[i] myord = event.orders[i] hlen = histlens[i] mytime = myord / hlen if h == 0 and simulation: if mytime > 0.5: truth = 1 else: truth = 0 else: if mytime <= 0.5: early = early + 1 else: late = late + 1 return (early, late, truth)
def get_history_lengths(events, histScores, simhist=None, usepreval=False): numhists=histScores.shape[0] mymax=len(events)+500 myranks=np.ones((numhists, len(events))) * mymax for j in xrange(len(events)): e=events[j] hindices = ecycles.historyids_to_indices(e.histories, histScores) if usepreval: if simhist is not None and simhist in e.histories: myranks[hindices,j]=np.array(e.prevals, dtype=float) else: myranks[hindices,j]=np.array(e.prevals, dtype=float) else: if simhist is not None and simhist in e.histories: myranks[hindices,j]=np.array(e.orders, dtype=float) else: myranks[hindices,j]=np.array(e.orders, dtype=float) #change the orders into ranks. hlengths=np.sum(myranks<mymax, axis=1).astype('float') hlengths[hlengths==1]=1.1 xord=myranks.argsort(axis=1) xranks=xord.argsort(axis=1) cranks=xranks.astype('float') / (hlengths[:,None] -1) return (hlengths.astype('int'), cranks)
def count_early_vs_late(event, historylengths, simulation): #event.histories=ecycles.listout_ranges(event.histRanges) hindices = ecycles.historyids_to_indices(event.histories, historylengths) histlens= historylengths[hindices,1] early=0 late=0 mid=0 truth=-1 for i in xrange(len(event.histories)): h=event.histories[i] myord=event.orders[i] hlen=histlens[i] mytime=myord/hlen if h==0 and simulation: if mytime >0.5: truth=1 else: truth=0 else: if mytime <=0.5: early=early+1 else: late=late+1 return(early, late, truth)