def zca_approx(data, ksize, ssize): N, H, W, C = np.shape(data) KX, KY, KZ = ksize SX, SY, SZ = ssize for sx in range(0, KX, SX): for sy in range(0, KY, SY): for sz in range(0, KZ, SZ): for x in range(sx, H + sx, KX): for y in range(sy, W + sy, KY): for z in range(sz, C + sz, KZ): x1 = x x2 = x + KX y1 = y y2 = y + KY z1 = z z2 = z + KZ if (x2 > H or y2 > W or z2 > C): continue print(x, y, z) white = whiten(X=data[:, x1:x2, y1:y2, z1:z2], method='zca') white = np.reshape(white, (N, x2 - x1, y2 - y1, z2 - z1)) data[:, x1:x2, y1:y2, z1:z2] = white return data
def zca_approx(data, ksize, ssize): N, H, W, C = np.shape(data) KX, KY, KZ = ksize SX, SY, SZ = ssize for sx in range(0, KX, SX): for sy in range(0, KY, SY): for sz in range(0, KZ, SZ): for x in range(sx, H+sx, KX): for y in range(sy, W+sy, KY): for z in range(sz, C+sz, KZ): print (x, y, z) x1 = x x2 = min(x + KX, H) y1 = y y2 = min(y + KY, W) z1 = z z2 = min(z + KZ, C) white = whiten(X=x_train[:, x1:x2, y1:y2, z1:z2], method='zca') white = np.reshape(white, (N, x2-x1, y2-y1, z2-z1)) x_train[:, x1:x2, y1:y2, z1:z2] = white
def get_patches(X, patch_shape, patch_start, patch_num): PH, PW, PC = patch_shape patches = np.zeros(shape=(patch_num, PH, PW, PC)) for ii in range(patch_num): idx = (patch_start + ii) % TRAIN_EXAMPLES h = np.random.randint(H - PH) w = np.random.randint(W - PW) # c = np.random.randint(C - PC) # we are taking all the channels patch = X[idx, h:h + PH, w:w + PW, :] patches[ii] = patch patches = whiten(patches) patches = np.reshape(patches, (-1, PH, PW, PC)) return patches
def dncuts(A, NVEC, N_DOWNSAMPLE, DECIMATE, SZ): # A = affinity matrix # NEVC = number of eigenvectors (set to 16?) # N_DOWNSAMPLE = number of downsampling operations (2 seems okay) # DECIMATE = amount of decimation for each downsampling operation (set to 2) # SZ = size of the image corresponding to A A_down = A SZ_down = np.array(SZ, dtype=np.int64)[:2] Bs = {} for di in range(N_DOWNSAMPLE): (j, i) = np.unravel_index(range(A_down.shape[0]), SZ_down) do_keep = np.logical_and((i%DECIMATE == 0),(j%DECIMATE == 0)) do_keep_idx = np.argwhere(do_keep).flatten() A_sub = (A_down[:,do_keep_idx]).T d = np.sum(A_sub, 0) + (np.finfo(float).eps) B = (A_sub / d).T A_down = (A_sub.dot(B)).T SZ_down = np.floor(SZ_down / 2) SZ_down = np.array(SZ_down, dtype=np.int64) Bs[di] = B A_down = sparse.csr_matrix(A_down) EV, EVal = ncuts(A_down, NVEC) for di in range(N_DOWNSAMPLE-1,-1,-1): EV = Bs[di] * EV EVal = (2 ** -N_DOWNSAMPLE) * EVal EV = whiten(EV,1, 0) return EV, EVal
def main(): logging.basicConfig(level=logging.INFO, format='%(asctime)s [%(levelname)s] %(message)s', datefmt='%Y-%m-%d %H:%M:%S') logging.info('*** Starting: Compute CC ***') # Connection to the DB db = connect() if len(get_filters(db, all=False)) == 0: logging.info("NO FILTERS DEFINED, exiting") sys.exit() # Get Configuration params = Params() params.goal_sampling_rate = float(get_config(db, "cc_sampling_rate")) params.goal_duration = float(get_config(db, "analysis_duration")) params.overlap = float(get_config(db, "overlap")) params.maxlag = float(get_config(db, "maxlag")) params.min30 = float(get_config( db, "corr_duration")) * params.goal_sampling_rate params.windsorizing = float(get_config(db, "windsorizing")) params.resampling_method = get_config(db, "resampling_method") params.decimation_factor = int(get_config(db, "decimation_factor")) params.preprocess_lowpass = float(get_config(db, "preprocess_lowpass")) params.preprocess_highpass = float(get_config(db, "preprocess_highpass")) params.keep_all = get_config(db, 'keep_all', isbool=True) params.keep_days = get_config(db, 'keep_days', isbool=True) params.components_to_compute = get_components_to_compute(db) logging.info("Will compute %s" % " ".join(params.components_to_compute)) while is_next_job(db, jobtype='CC'): jobs = get_next_job(db, jobtype='CC') stations = [] pairs = [] refs = [] for job in jobs: refs.append(job.ref) pairs.append(job.pair) netsta1, netsta2 = job.pair.split(':') stations.append(netsta1) stations.append(netsta2) goal_day = job.day stations = np.unique(stations) logging.info("New CC Job: %s (%i pairs with %i stations)" % (goal_day, len(pairs), len(stations))) jt = time.time() xlen = int(params.goal_duration * params.goal_sampling_rate) if ''.join(params.components_to_compute).count('R') > 0 or ''.join( params.components_to_compute).count('T') > 0: comps = ['Z', 'E', 'N'] tramef_Z = np.zeros((len(stations), xlen)) tramef_E = np.zeros((len(stations), xlen)) tramef_N = np.zeros((len(stations), xlen)) basetime, tramef_Z, tramef_E, tramef_N = preprocess( db, stations, comps, goal_day, params, tramef_Z, tramef_E, tramef_N) else: comps = ['Z'] tramef_Z = np.zeros((len(stations), xlen)) basetime, tramef_Z = preprocess(db, stations, comps, goal_day, params, tramef_Z) # print '##### STREAMS ARE ALL PREPARED AT goal Hz #####' dt = 1. / params.goal_sampling_rate # Calculate the number of slices slices = int(params.goal_duration * params.goal_sampling_rate / params.min30) begins = [] ends = [] i = 0 while i <= (params.goal_duration - params.min30 / params.goal_sampling_rate): begins.append(int(i * params.goal_sampling_rate)) ends.append(int(i * params.goal_sampling_rate + params.min30)) i += int(params.min30 / params.goal_sampling_rate * (1.0 - params.overlap)) slices = len(begins) # # Computing only ZZ components ? Then we can be much faster: # #if False: if len(params.components_to_compute ) == 1 and params.components_to_compute[0] == "ZZ": Nfft = params.min30 if params.min30 / 2 % 2 != 0: Nfft = params.min30 + 2 cp = cosTaper(int(params.min30), 0.04) logging.info("Pre-Whitening Traces") whitened_slices = np.zeros( (len(stations), len(get_filters( db, all=False)), slices, int(Nfft)), dtype=np.complex) for istation, station in enumerate(stations): for islice, (begin, end) in enumerate(zip(begins, ends)): tmp = tramef_Z[istation, begin:end] rmsmat = np.std(np.abs(tmp)) if params.windsorizing == -1: tmp = np.sign(tmp) elif params.windsorizing != 0: indexes = np.where( np.abs(tmp) > (params.windsorizing * rmsmat))[0] tmp[indexes] = (tmp[indexes] / np.abs( tmp[indexes])) * params.windsorizing * rmsmat tmp *= cp for ifilter, filter in enumerate(get_filters(db, all=False)): whitened_slices[istation, ifilter, islice, :] = whiten(tmp, Nfft, dt, float(filter.low), float(filter.high), plot=False) del tmp del tramef_Z logging.info("Processing CC") for ifilter, filter in enumerate(get_filters(db, all=False)): for pair in pairs: orig_pair = pair if params.keep_all: allcorr = {} if params.keep_days: daycorr = np.zeros(get_maxlag_samples(db, )) ndaycorr = 0 station1, station2 = pair.split(':') pair = (np.where(stations == station1)[0][0], np.where(stations == station2)[0][0]) for islice in range(slices): tmp = np.vstack( (whitened_slices[pair[0], ifilter, islice], whitened_slices[pair[1], ifilter, islice])) corr = myCorr(tmp, np.ceil(params.maxlag / dt), plot=False) tmptime = time.gmtime(basetime + begins[islice] / params.goal_sampling_rate) thisdate = time.strftime("%Y-%m-%d", tmptime) thistime = time.strftime("%Y-%m-%d %H:%M:%S", tmptime) if not np.any(np.isnan(corr)) and not np.any( np.isinf(corr)): if params.keep_all: ccfid = "%s_%s_%s_%s_%s" % (station1, station2, filter.ref, 'ZZ', thisdate) if ccfid not in allcorr: allcorr[ccfid] = {} allcorr[ccfid][thistime] = corr if params.keep_days: daycorr += corr ndaycorr += 1 if params.keep_all: for ccfid in allcorr.keys(): export_allcorr(db, ccfid, allcorr[ccfid]) if params.keep_days: thisdate = time.strftime("%Y-%m-%d", time.gmtime(basetime)) thistime = time.strftime("%H_%M", time.gmtime(basetime)) add_corr(db, station1.replace('.', '_'), station2.replace('.', '_'), filter.ref, thisdate, thistime, params.min30 / params.goal_sampling_rate, 'ZZ', daycorr, params.goal_sampling_rate, day=True, ncorr=ndaycorr) update_job(db, goal_day, orig_pair, 'CC', 'D') logging.info("Job Finished. It took %.2f seconds" % (time.time() - jt)) else: # ITERATING OVER PAIRS ##### for pair in pairs: orig_pair = pair logging.info('Processing pair: %s' % pair.replace(':', ' vs ')) tt = time.time() station1, station2 = pair.split(':') pair = (np.where(stations == station1)[0][0], np.where(stations == station2)[0][0]) s1 = get_station(db, station1.split('.')[0], station1.split('.')[1]) s2 = get_station(db, station2.split('.')[0], station2.split('.')[1]) if s1.X: X0 = s1.X Y0 = s1.Y c0 = s1.coordinates X1 = s2.X Y1 = s2.Y c1 = s2.coordinates if c0 == c1: coordinates = c0 else: coordinates = 'MIX' cplAz = np.deg2rad(azimuth(coordinates, X0, Y0, X1, Y1)) logging.debug("Azimuth=%.1f" % np.rad2deg(cplAz)) else: # logging.debug('No Coordinates found! Skipping azimuth calculation!') cplAz = 0. for components in params.components_to_compute: if components == "ZZ": t1 = tramef_Z[pair[0]] t2 = tramef_Z[pair[1]] elif components[0] == "Z": t1 = tramef_Z[pair[0]] t2 = tramef_E[pair[1]] elif components[1] == "Z": t1 = tramef_E[pair[0]] t2 = tramef_Z[pair[1]] else: t1 = tramef_E[pair[0]] t2 = tramef_E[pair[1]] if np.all(t1 == 0) or np.all(t2 == 0): logging.debug("%s contains empty trace(s), skipping" % components) continue del t1, t2 if components[0] == "Z": t1 = tramef_Z[pair[0]] elif components[0] == "R": if cplAz != 0: t1 = tramef_N[pair[0]] * np.cos(cplAz) +\ tramef_E[pair[0]] * np.sin(cplAz) else: t1 = tramef_E[pair[0]] elif components[0] == "T": if cplAz != 0: t1 = tramef_N[pair[0]] * np.sin(cplAz) -\ tramef_E[pair[0]] * np.cos(cplAz) else: t1 = tramef_N[pair[0]] if components[1] == "Z": t2 = tramef_Z[pair[1]] elif components[1] == "R": if cplAz != 0: t2 = tramef_N[pair[1]] * np.cos(cplAz) +\ tramef_E[pair[1]] * np.sin(cplAz) else: t2 = tramef_E[pair[1]] elif components[1] == "T": if cplAz != 0: t2 = tramef_N[pair[1]] * np.sin(cplAz) -\ tramef_E[pair[1]] * np.cos(cplAz) else: t2 = tramef_N[pair[1]] trames = np.vstack((t1, t2)) del t1, t2 daycorr = {} ndaycorr = {} allcorr = {} for filterdb in get_filters(db, all=False): filterid = filterdb.ref daycorr[filterid] = np.zeros(get_maxlag_samples(db, )) ndaycorr[filterid] = 0 for islice, (begin, end) in enumerate(zip(begins, ends)): # print "Progress: %#2d/%2d"% (islice+1,slices) trame2h = trames[:, begin:end] rmsmat = np.std(np.abs(trame2h), axis=1) for filterdb in get_filters(db, all=False): filterid = filterdb.ref low = float(filterdb.low) high = float(filterdb.high) rms_threshold = filterdb.rms_threshold Nfft = int(params.min30) if params.min30 / 2 % 2 != 0: Nfft = params.min30 + 2 trames2hWb = np.zeros((2, int(Nfft)), dtype=np.complex) skip = False for i, station in enumerate(pair): if rmsmat[i] > rms_threshold: cp = cosTaper(len(trame2h[i]), 0.04) trame2h[i] -= trame2h[i].mean() if params.windsorizing == -1: trame2h[i] = np.sign(trame2h[i]) elif params.windsorizing != 0: indexes = np.where( np.abs(trame2h[i]) > ( params.windsorizing * rmsmat[i]))[0] # clipping at windsorizing*rms trame2h[i][indexes] = ( trame2h[i][indexes] / np.abs(trame2h[i][indexes]) ) * params.windsorizing * rmsmat[i] trames2hWb[i] = whiten(trame2h[i] * cp, Nfft, dt, low, high, plot=False) else: trames2hWb[i] = np.zeros(int(Nfft)) skip = True logging.debug('Slice is Zeros!') if not skip: corr = myCorr(trames2hWb, np.ceil(params.maxlag / dt), plot=False) tmptime = time.gmtime( basetime + begin / params.goal_sampling_rate) thisdate = time.strftime("%Y-%m-%d", tmptime) thistime = time.strftime( "%Y-%m-%d %H:%M:%S", tmptime) if params.keep_all: ccfid = "%s_%s_%s_%s_%s" % ( station1, station2, filterid, components, thisdate) if ccfid not in allcorr: allcorr[ccfid] = {} allcorr[ccfid][thistime] = corr if params.keep_days: if not np.any(np.isnan(corr)) and \ not np.any(np.isinf(corr)): daycorr[filterid] += corr ndaycorr[filterid] += 1 del corr, thistime, trames2hWb if params.keep_all: for ccfid in allcorr.keys(): export_allcorr(db, ccfid, allcorr[ccfid]) if params.keep_days: try: for filterdb in get_filters(db, all=False): filterid = filterdb.ref corr = daycorr[filterid] ncorr = ndaycorr[filterid] if ncorr > 0: logging.debug( "Saving daily CCF for filter %02i, comp %s (stack of %02i CCF)" % (filterid, components, ncorr)) thisdate = time.strftime( "%Y-%m-%d", time.gmtime(basetime)) thistime = time.strftime( "%H_%M", time.gmtime(basetime)) add_corr(db, station1.replace('.', '_'), station2.replace('.', '_'), filterid, thisdate, thistime, params.min30 / params.goal_sampling_rate, components, corr, params.goal_sampling_rate, day=True, ncorr=ncorr) del corr, ncorr except Exception as e: logging.debug(str(e)) del trames, daycorr, ndaycorr logging.debug("Updating Job") update_job(db, goal_day, orig_pair, 'CC', 'D') logging.info( "Finished processing this pair. It took %.2f seconds" % (time.time() - tt)) logging.info("Job Finished. It took %.2f seconds" % (time.time() - jt)) logging.info('*** Finished: Compute CC ***')
y1 = y y2 = min(y + y_step, 32) z1 = z z2 = min(z + y_step, 3) # white = whiten(X=x_train[:, x1:x2, y1:y2, z1:z2], method='zca') # white = np.reshape(white, (50000, x2-x1, y2-y1, z2-z1)) # x_train[:, x1:x2, y1:y2, z1:z2] = white white = whiten(X=x_train[:, x1:x2, y1:y2, :], method='zca') white = np.reshape(white, (50000, x2-x1, y2-y1, 3)) x_train[:, x1:x2, y1:y2, :] = white ''' x_train = whiten(x_train) x_train = np.reshape(x_train, (TRAIN_EXAMPLES, H, W, C)) patches = get_patches(X=x_train, patch_shape=(6, 6, 3), patch_num=400000) patches = np.reshape(patches, (400000, 6 * 6 * 3)) ########################################### centroids = kmeans(patches=patches, patch_shape=(6, 6, 3), patch_num=400000, centroid_num=128, iterations=10) filters = np.reshape(centroids, (128, 6, 6, 3)) filters = np.transpose(filters, (1, 2, 3, 0)) viz('filters', filters)
def __init__(self, data, k): self.whitened = whiten(data, k)
def main(): logging.basicConfig(level=logging.INFO, format='%(asctime)s [%(levelname)s] %(message)s', datefmt='%Y-%m-%d %H:%M:%S') logging.info('*** Starting: Compute SC ***') # Connection to the DB db = connect() #rule out absence of filters if len(get_filters(db, all=False)) == 0: logging.info("NO FILTERS DEFINED, exiting") sys.exit() # Get Configuration params = Params() params.goal_sampling_rate = float(get_config(db, "cc_sampling_rate")) params.goal_duration = float(get_config(db, "analysis_duration")) params.overlap = float(get_config(db, "overlap")) params.maxlag = float(get_config(db, "maxlag")) params.min30 = float(get_config(db, "corr_duration")) * params.goal_sampling_rate params.windsorizing = float(get_config(db, "windsorizing")) params.resampling_method = get_config(db, "resampling_method") params.decimation_factor = int(get_config(db, "decimation_factor")) params.preprocess_lowpass = float(get_config(db, "preprocess_lowpass")) params.preprocess_highpass = float(get_config(db, "preprocess_highpass")) params.keep_all = get_config(db, 'keep_all', isbool=True) params.keep_days = get_config(db, 'keep_days', isbool=True) params.components_to_compute = ['Z', 'E', 'N'] ################################## logging.info("Will compute %s" % " ".join(params.components_to_compute)) ################################## stations_to_analyse = ["%s.%s" % (sta.net, sta.sta) for sta in get_stations(db, all=True)]#extract all stations pairs = []#pair of components ################################## #modified part to make pair list with comps ################################## i = 0 for comp in params.components_to_compute: for newcomp in params.components_to_compute: if comp != newcomp and comp > newcomp: if i == 0: pairs = np.array(':'.join([comp, newcomp])) i+=1 else: pairs = np.vstack((pairs,':'.join([comp, newcomp]))) pairs = np.hstack(pairs) #components_to_compute = ['ZE', 'NE', 'ZN'] #pairs=components_to_compute #for station_unique in stations_to_analyse: while is_next_job(db, jobtype='SC'): jobs = get_next_job(db, jobtype='SC') #logging.info("Working on station %s" %(station_unique)) stations = [] refs = [] #go through job to make job array with stations for job in jobs: refs.append(job.ref) #pairs.append(job.pair)#find a way to pair comps /!\ netsta = job.pair #just 1 station in the cell? stations.append(netsta) #stations.append(netstacomp2) goal_day = job.day stations = np.unique(stations)#only 1 station in it #print "only this station(s) here: %s, ref= %s" %(stations, refs) logging.info("New SC Job: %s (%i pairs with %i stations)" % (goal_day, len(pairs)*len(stations), len(stations))) jt = time.time() xlen = int(params.goal_duration * params.goal_sampling_rate) comps = ['Z', 'E', 'N'] tramef_Z = np.zeros((len(stations), xlen)) tramef_E = np.zeros((len(stations), xlen)) tramef_N = np.zeros((len(stations), xlen)) basetime, tramef_Z, tramef_E, tramef_N = preprocess(db, stations, comps, goal_day, params, tramef_Z, tramef_E, tramef_N)# preprocessing #print type(tramef_E) # comps = ['Z'] # tramef_Z = np.zeros((len(stations), xlen)) # basetime, tramef_Z = preprocess(db, stations, comps, goal_day, params, tramef_Z) # print type(tramef_Z) dt = 1. / params.goal_sampling_rate # Calculate the number of slices slices = int(params.goal_duration * params.goal_sampling_rate / params.min30) begins = [] ends = [] i = 0 while i <= (params.goal_duration - params.min30 / params.goal_sampling_rate): begins.append(int(i * params.goal_sampling_rate)) ends.append(int(i * params.goal_sampling_rate + params.min30)) i += int(params.min30 / params.goal_sampling_rate * (1.0 - params.overlap)) slices = len(begins) # ########################################################################################################## for station in stations: orig_pair = station for pair in pairs: #print "Processing pair %s for station %s - %s" %(pair, station, goal_day) logging.info("Processing pair %s for station %s - %s" %(pair, station, goal_day)) #print type(tramef_Z) #print tramef_Z.keys() tt = time.time() comp1,comp2=pair.split(':') components=comp1+comp2 ### load trames #assign trames according to pair station_to_analyse=np.where(stations==station) #print "you are looking for ",station_to_analyse if pair.split(':')[0]=='Z': tr1=tramef_Z[station_to_analyse] elif pair.split(':')[0]=='E': tr1=tramef_E[station_to_analyse] elif pair.split(':')[0]=='N': tr1=tramef_N[station_to_analyse] if pair.split(':')[1]=='Z': tr2=tramef_Z[station_to_analyse] elif pair.split(':')[1]=='E': tr2=tramef_E[station_to_analyse] elif pair.split(':')[1]=='N': tr2=tramef_N[station_to_analyse] if np.all(tr1 == 0) or np.all(tr2 == 0): logging.debug("%s contains empty trace(s), skipping"%components) continue #print "tr1 est de type ",type(tr1) if np.all(tr1 == 0) or np.all(tr2 == 0): logging.debug("%s contains empty trace(s), skipping"%components) continue trames=np.vstack((tr1,tr2)) # print tr1 # print tr2 #print np.std(tr1, axis=1), np.std(tr2, axis=1) del tr1,tr2 ## islice daycorr = {} ndaycorr = {} allcorr = {} for filterdb in get_filters(db, all=False): filterid = filterdb.ref daycorr[filterid] = np.zeros(get_maxlag_samples(db,)) ndaycorr[filterid] = 0 baddata=False for islice, (begin, end) in enumerate(zip(begins, ends)): #print "Progress: %#2d/%2d"% (islice+1,slices) trame2h = trames[:, begin:end] #print "Here comes the TRACES!! \n",trames rmsmat = np.std(trame2h, axis=1) for filterdb in get_filters(db, all=False): filterid = filterdb.ref low = float(filterdb.low) high = float(filterdb.high) rms_threshold = filterdb.rms_threshold Nfft = int(params.min30) if params.min30 / 2 % 2 != 0: Nfft = params.min30 + 2 trames2hWb = np.zeros((2, int(Nfft)), dtype=np.complex) skip = False for i, comp in enumerate(pair.split(':')):#### fix this loop, work on each comp of the pair #print i, comp, station, goal_day if rmsmat[i] > rms_threshold: cp = cosTaper(len(trame2h[i]),0.04) trame2h[i] -= trame2h[i].mean() if params.windsorizing == -1: trame2h[i] = np.sign(trame2h[i]) elif params.windsorizing != 0: indexes = np.where( np.abs(trame2h[i]) > (params.windsorizing * rmsmat[i]))[0] # clipping at windsorizing*rms trame2h[i][indexes] = (trame2h[i][indexes] / np.abs( trame2h[i][indexes])) * params.windsorizing * rmsmat[i] trames2hWb[i] = whiten( trame2h[i]*cp, Nfft, dt, low, high, plot=False) else: trames2hWb[i] = np.zeros(int(Nfft)) skip = True logging.debug('Slice is Zeros!') if not skip: corr = myCorr(trames2hWb, np.ceil(params.maxlag / dt), plot=False) tmptime = time.gmtime(basetime + begin / params.goal_sampling_rate) thisdate = time.strftime("%Y-%m-%d", tmptime) thistime = time.strftime("%Y-%m-%d %H:%M:%S", tmptime) if params.keep_all: ccfid = "%s_%s_%s_%s_%s" % (station,station, filterid, components, thisdate) if ccfid not in allcorr: allcorr[ccfid] = {} allcorr[ccfid][thistime] = corr if params.keep_days: #print "KEEP DAYS!" if not np.any(np.isnan(corr)) and \ not np.any(np.isinf(corr)): daycorr[filterid] += corr ndaycorr[filterid] += 1 del corr, thistime, trames2hWb else: #print "NOOOOOOOOOOO! Zeros!" baddata=True #raw_input() if baddata==True: logging.info("Bad data: ",pair, station, goal_day) badfolder=os.path.join("BAD","%s"%(station)) output=np.array([station, pair, goal_day]) if not os.path.isdir(badfolder): logging.info("Creating dir for ", output) os.makedirs(badfolder) badfile=os.path.join(badfolder, "%s.txt" % str(goal_day)) np.savetxt(badfile, output, delimiter=';', fmt="%s") if params.keep_all: for ccfid in allcorr.keys(): export_allcorr(db, ccfid, allcorr[ccfid]) if params.keep_days: try: for filterdb in get_filters(db, all=False): filterid = filterdb.ref corr = daycorr[filterid] ncorr = ndaycorr[filterid] if ncorr > 0: logging.debug( "Saving daily CCF for filter %02i, comp %s (stack of %02i CCF)" % (filterid, components, ncorr)) thisdate = time.strftime( "%Y-%m-%d", time.gmtime(basetime)) thistime = time.strftime( "%H_%M", time.gmtime(basetime)) stationpair="%s_%s" %(station,station) add_corr( db, station.replace('.', '_'),################# station.replace('.', '_'), filterid, thisdate, thistime, params.min30 / params.goal_sampling_rate, components, corr, params.goal_sampling_rate, day=True, ncorr=ncorr) del corr, ncorr except Exception as e: logging.debug(str(e)) del trames, daycorr, ndaycorr logging.debug("Updating Job") update_job(db, goal_day, orig_pair, 'SC', 'D') logging.info("Finished processing this station. It took %.2f seconds" % (time.time() - tt)) logging.info("Job Finished. It took %.2f seconds" % (time.time() - jt))
fw = gzip.open('../data/usps_new_whiten.pkl.gz', 'wb') mat = scipy.io.loadmat('../data/usps_all.mat') dataMat = mat['data'] numNumber = len(dataMat[0,0,:]) sampleNumber = len(dataMat[0,:,0]) sampleDim = len(dataMat[:,0,0]) processedData = numpy.zeros((numNumber*sampleNumber,sampleDim),dtype=float32) processedLabel = numpy.zeros((numNumber*sampleNumber),dtype=int64) for i in xrange(numNumber): for j in xrange(sampleNumber): a0 = dataMat[:,j,i] a0 = a0.reshape([16,16]) a0 = a0.transpose(); a0 = a0.reshape(256) processedData[i*sampleNumber+j,:] = a0 if i == 9: processedLabel[i*sampleNumber:(i+1)*sampleNumber] = numpy.zeros(sampleNumber) else: processedLabel[i*sampleNumber:(i+1)*sampleNumber] = (i+1)*numpy.ones(sampleNumber) pdb.set_trace() processedData = whiten(processedData) dataset = [processedData,processedLabel] cPickle.dump(dataset,fw) fw.close()
centroids[idx] = summation[idx] / _counts[idx] centroids[nidx] = 0. return centroids ########################################### x_train = np.reshape(x_train, (TRAIN_EXAMPLES, H, W, C)) mean = np.mean(x_train, axis=(0, 1, 2), keepdims=True) std = np.std(x_train, axis=(0, 1, 2), ddof=1, keepdims=True) scale = std + 1. x_train = x_train - mean x_train = x_train / scale x_train = whiten(X=x_train, method='zca') x_train = np.reshape(x_train, (TRAIN_EXAMPLES, H, W, C)) ########################################### ''' filters = np.zeros(shape=(96, 5, 5, 3)) for ii in range(6): patches = get_patches(X=x_train, patch_shape=(5, 5, 3), patch_num=400000) patches = np.reshape(patches, (400000, 5*5*3)) centroids = kmeans(patches=patches, patch_shape=(5, 5, 3), patch_num=400000, centroid_num=16, iterations=25) filters[ii*16:(ii+1)*16] = np.reshape(centroids, (16, 5, 5, 3)) ''' patches = get_patches(X=x_train, patch_shape=(5, 5, 3), patch_num=400000) patches = np.reshape(patches, (400000, 5 * 5 * 3)) centroids = kmeans(patches=patches,
trames2hWb = np.zeros((2, int(Nfft)), dtype=np.complex) for i, station in enumerate(pair): # print "USING rms threshold = %f" % rms_threshold # logging.debug("rmsmat[i] = %f" % rmsmat[i]) if rmsmat[i] > rms_threshold: if windsorizing != 0: indexes = np.where( np.abs(trame2h[i]) > (windsorizing * rmsmat[i]))[0] # clipping at windsorizing*rms trame2h[i][indexes] = (trame2h[i][indexes] / np.abs( trame2h[i][indexes])) * windsorizing * rmsmat[i] # logging.debug('whiten') trames2hWb[i] = whiten( trame2h[i], Nfft, dt, low, high, plot=False) else: # logging.debug("Station no %d, pas de pretraitement car rms < %f ou NaN"% (i, rms_threshold)) trames2hWb[i] = np.zeros(Nfft) corr = myCorr(trames2hWb, np.ceil(maxlag / dt), plot=False) thisdate = time.strftime( "%Y-%m-%d", time.gmtime(basetime + itranche * min30 / fe)) thistime = time.strftime( "%H_%M", time.gmtime(basetime + itranche * min30 / fe)) if keep_all: add_corr(db, station1.replace('.', '_'), station2.replace( '.', '_'), filterid, thisdate, thistime, min30 / fe, components, corr, fe) if keep_days:
low = float(filterdb.low) high = float(filterdb.high) rms_threshold = filterdb.rms_threshold # print "Filter Bounds used:", filterid, low, high trames2hWb= np.zeros(np.shape(trame2h)) for i, station in enumerate(pair): # print "USING rms threshold = %f" % rms_threshold # logging.debug("rmsmat[i] = %f" % rmsmat[i]) if rmsmat[i] > rms_threshold: if windsorizing != 0: indexes = np.where(np.abs(trame2h[i]) > ( windsorizing*rmsmat[i] ) )[0] #clipping at windsorizing*rms trame2h[i][indexes] = (trame2h[i][indexes]/np.abs(trame2h[i][indexes])) * windsorizing * rmsmat[i] # logging.debug('whiten') trames2hWb[i] = whiten(trame2h[i],min30, dt, low, high, plot=False) else: # logging.debug("Station no %d, pas de pretraitement car rms < %f ou NaN"% (i, rms_threshold)) trames2hWb[i] = trame2h[i] corr = myCorr(trames2hWb, np.ceil(maxlag/dt),plot=False) thisdate = time.strftime("%Y-%m-%d",time.gmtime(basetime+itranche*min30/fe)) thistime = time.strftime("%H_%M",time.gmtime(basetime+itranche*min30/fe)) if keep_all: add_corr(db, station1.replace('.','_'), station2.replace('.','_'),filterid, thisdate, thistime, min30/fe, components, corr, fe) if keep_days: if not np.any(np.isnan(corr)) and not np.any(np.isinf(corr)): daycorr[filterid] += corr ndaycorr[filterid] += 1
######## xy = 0. batch_size = 100 for idx in range(0, 100000, batch_size): start = idx end = idx + batch_size x = np.random.uniform(low=-1., high=1., size=(batch_size, LAYER1)) x = x @ weights0 mean = np.mean(x, axis=0, keepdims=True) std = np.std(x, axis=0, ddof=1, keepdims=True) x = (x - mean) / std x = whiten(x) y = x @ weights1 xy += x.T @ y ######## angle1 = angle_between(np.reshape( xy, -1), np.reshape(weights0.T @ weights0 @ weights1, -1)) * (180.0 / 3.14) angle2 = angle_between(np.reshape(xy, -1), np.reshape(weights1, -1)) * (180.0 / 3.14) print(angle1, angle2) ########
#test_set = cPickle.load(fr) #label = test_set[1] train_set, valid_set, test_set = cPickle.load(fr) lengthx = len(train_set[0][:,0]) trainData = numpy.zeros((lengthx,resizeSize**2),dtype=float32) for i in xrange(len(train_set[0][:,1])): a0 = train_set[0][i,:] a0 = a0.reshape(28, 28) a0 = scipy.misc.imresize(a0,[16,16]) a0 = a0.reshape(256) trainData[i,:] = a0 trainData = whiten(trainData) resizeTrainSet = [trainData,train_set[1]] lengthx = len(test_set[0][:,0]) testData = numpy.zeros((lengthx,resizeSize**2),dtype=float32) for i in xrange(len(test_set[0][:,1])): a0 = test_set[0][i,:] a0 = a0.reshape(28, 28) a0 = scipy.misc.imresize(a0,[16,16]) a0 = a0.reshape(256) testData[i,:] = a0 testData = whiten(testData) resizeTestSet = [testData,test_set[1]]
x1 = x x2 = x + x_step y1 = y y2 = y + y_step z1 = z z2 = z + z_step if (x2 > 16 or y2 > 16 or z2 > 96): continue print(x, y, z) white = whiten(X=x_train[:, x1:x2, y1:y2, z1:z2], method='zca') white = np.reshape(white, (TRAIN_EXAMPLES, x2 - x1, y2 - y1, z2 - z1)) x_train[:, x1:x2, y1:y2, z1:z2] = white ########################################### ''' filters = np.zeros(shape=(128, 5, 5, 96)) for ii in range(8): centroids = kmeans(X=x_train, patch_shape=(5, 5, 96), patch_num=400000, centroid_num=16, iterations=25) start = ii * 16 end = (ii + 1) * 16 filters[start:end] = np.reshape(centroids, (16, 5, 5, 96)) ''' centroids = kmeans(X=x_train, patch_shape=(5, 5, 96),
def main(): logging.basicConfig( level=logging.INFO, format="%(asctime)s [%(levelname)s] %(message)s", datefmt="%Y-%m-%d %H:%M:%S" ) logging.info("*** Starting: Compute CC ***") # Connection to the DB db = connect() if len(get_filters(db, all=False)) == 0: logging.info("NO FILTERS DEFINED, exiting") sys.exit() # Get Configuration params = Params() params.goal_sampling_rate = float(get_config(db, "cc_sampling_rate")) params.goal_duration = float(get_config(db, "analysis_duration")) params.overlap = float(get_config(db, "overlap")) params.maxlag = float(get_config(db, "maxlag")) params.min30 = float(get_config(db, "corr_duration")) * params.goal_sampling_rate params.windsorizing = float(get_config(db, "windsorizing")) params.resampling_method = get_config(db, "resampling_method") params.decimation_factor = int(get_config(db, "decimation_factor")) params.preprocess_lowpass = float(get_config(db, "preprocess_lowpass")) params.preprocess_highpass = float(get_config(db, "preprocess_highpass")) params.keep_all = get_config(db, "keep_all", isbool=True) params.keep_days = get_config(db, "keep_days", isbool=True) params.components_to_compute = get_components_to_compute(db) logging.info("Will compute %s" % " ".join(params.components_to_compute)) while is_next_job(db, jobtype="CC"): jobs = get_next_job(db, jobtype="CC") stations = [] pairs = [] refs = [] for job in jobs: refs.append(job.ref) pairs.append(job.pair) netsta1, netsta2 = job.pair.split(":") stations.append(netsta1) stations.append(netsta2) goal_day = job.day stations = np.unique(stations) logging.info("New CC Job: %s (%i pairs with %i stations)" % (goal_day, len(pairs), len(stations))) jt = time.time() xlen = int(params.goal_duration * params.goal_sampling_rate) if "".join(params.components_to_compute).count("R") > 0 or "".join(params.components_to_compute).count("T") > 0: comps = ["Z", "E", "N"] tramef_Z = np.zeros((len(stations), xlen)) tramef_E = np.zeros((len(stations), xlen)) tramef_N = np.zeros((len(stations), xlen)) basetime, tramef_Z, tramef_E, tramef_N = preprocess( db, stations, comps, goal_day, params, tramef_Z, tramef_E, tramef_N ) else: comps = ["Z"] tramef_Z = np.zeros((len(stations), xlen)) basetime, tramef_Z = preprocess(db, stations, comps, goal_day, params, tramef_Z) # print '##### STREAMS ARE ALL PREPARED AT goal Hz #####' dt = 1.0 / params.goal_sampling_rate # Calculate the number of slices slices = int(params.goal_duration * params.goal_sampling_rate / params.min30) begins = [] ends = [] i = 0 while i <= (params.goal_duration - params.min30 / params.goal_sampling_rate): begins.append(int(i * params.goal_sampling_rate)) ends.append(int(i * params.goal_sampling_rate + params.min30)) i += int(params.min30 / params.goal_sampling_rate * (1.0 - params.overlap)) slices = len(begins) # # Computing only ZZ components ? Then we can be much faster: # # if False: if len(params.components_to_compute) == 1 and params.components_to_compute[0] == "ZZ": Nfft = params.min30 if params.min30 / 2 % 2 != 0: Nfft = params.min30 + 2 cp = cosTaper(int(params.min30), 0.04) logging.info("Pre-Whitening Traces") whitened_slices = np.zeros( (len(stations), len(get_filters(db, all=False)), slices, int(Nfft)), dtype=np.complex ) for istation, station in enumerate(stations): for islice, (begin, end) in enumerate(zip(begins, ends)): tmp = tramef_Z[istation, begin:end] rmsmat = np.std(np.abs(tmp)) if params.windsorizing == -1: tmp = np.sign(tmp) elif params.windsorizing != 0: indexes = np.where(np.abs(tmp) > (params.windsorizing * rmsmat))[0] tmp[indexes] = (tmp[indexes] / np.abs(tmp[indexes])) * params.windsorizing * rmsmat tmp *= cp for ifilter, filter in enumerate(get_filters(db, all=False)): whitened_slices[istation, ifilter, islice, :] = whiten( tmp, Nfft, dt, float(filter.low), float(filter.high), plot=False ) del tmp del tramef_Z logging.info("Processing CC") for ifilter, filter in enumerate(get_filters(db, all=False)): for pair in pairs: orig_pair = pair if params.keep_all: allcorr = {} if params.keep_days: daycorr = np.zeros(get_maxlag_samples(db)) ndaycorr = 0 station1, station2 = pair.split(":") pair = (np.where(stations == station1)[0][0], np.where(stations == station2)[0][0]) for islice in range(slices): tmp = np.vstack( (whitened_slices[pair[0], ifilter, islice], whitened_slices[pair[1], ifilter, islice]) ) corr = myCorr(tmp, np.ceil(params.maxlag / dt), plot=False) tmptime = time.gmtime(basetime + begins[islice] / params.goal_sampling_rate) thisdate = time.strftime("%Y-%m-%d", tmptime) thistime = time.strftime("%Y-%m-%d %H:%M:%S", tmptime) if not np.any(np.isnan(corr)) and not np.any(np.isinf(corr)): if params.keep_all: ccfid = "%s_%s_%s_%s_%s" % (station1, station2, filter.ref, "ZZ", thisdate) if ccfid not in allcorr: allcorr[ccfid] = {} allcorr[ccfid][thistime] = corr if params.keep_days: daycorr += corr ndaycorr += 1 if params.keep_all: for ccfid in allcorr.keys(): export_allcorr(db, ccfid, allcorr[ccfid]) if params.keep_days: thisdate = time.strftime("%Y-%m-%d", time.gmtime(basetime)) thistime = time.strftime("%H_%M", time.gmtime(basetime)) add_corr( db, station1.replace(".", "_"), station2.replace(".", "_"), filter.ref, thisdate, thistime, params.min30 / params.goal_sampling_rate, "ZZ", daycorr, params.goal_sampling_rate, day=True, ncorr=ndaycorr, ) update_job(db, goal_day, orig_pair, "CC", "D") logging.info("Job Finished. It took %.2f seconds" % (time.time() - jt)) else: # ITERATING OVER PAIRS ##### for pair in pairs: orig_pair = pair logging.info("Processing pair: %s" % pair.replace(":", " vs ")) tt = time.time() station1, station2 = pair.split(":") pair = (np.where(stations == station1)[0][0], np.where(stations == station2)[0][0]) s1 = get_station(db, station1.split(".")[0], station1.split(".")[1]) s2 = get_station(db, station2.split(".")[0], station2.split(".")[1]) if s1.X: X0 = s1.X Y0 = s1.Y c0 = s1.coordinates X1 = s2.X Y1 = s2.Y c1 = s2.coordinates if c0 == c1: coordinates = c0 else: coordinates = "MIX" cplAz = np.deg2rad(azimuth(coordinates, X0, Y0, X1, Y1)) logging.debug("Azimuth=%.1f" % np.rad2deg(cplAz)) else: # logging.debug('No Coordinates found! Skipping azimuth calculation!') cplAz = 0.0 for components in params.components_to_compute: if components == "ZZ": t1 = tramef_Z[pair[0]] t2 = tramef_Z[pair[1]] elif components[0] == "Z": t1 = tramef_Z[pair[0]] t2 = tramef_E[pair[1]] elif components[1] == "Z": t1 = tramef_E[pair[0]] t2 = tramef_Z[pair[1]] else: t1 = tramef_E[pair[0]] t2 = tramef_E[pair[1]] if np.all(t1 == 0) or np.all(t2 == 0): logging.debug("%s contains empty trace(s), skipping" % components) continue del t1, t2 if components[0] == "Z": t1 = tramef_Z[pair[0]] elif components[0] == "R": if cplAz != 0: t1 = tramef_N[pair[0]] * np.cos(cplAz) + tramef_E[pair[0]] * np.sin(cplAz) else: t1 = tramef_E[pair[0]] elif components[0] == "T": if cplAz != 0: t1 = tramef_N[pair[0]] * np.sin(cplAz) - tramef_E[pair[0]] * np.cos(cplAz) else: t1 = tramef_N[pair[0]] if components[1] == "Z": t2 = tramef_Z[pair[1]] elif components[1] == "R": if cplAz != 0: t2 = tramef_N[pair[1]] * np.cos(cplAz) + tramef_E[pair[1]] * np.sin(cplAz) else: t2 = tramef_E[pair[1]] elif components[1] == "T": if cplAz != 0: t2 = tramef_N[pair[1]] * np.sin(cplAz) - tramef_E[pair[1]] * np.cos(cplAz) else: t2 = tramef_N[pair[1]] trames = np.vstack((t1, t2)) del t1, t2 daycorr = {} ndaycorr = {} allcorr = {} for filterdb in get_filters(db, all=False): filterid = filterdb.ref daycorr[filterid] = np.zeros(get_maxlag_samples(db)) ndaycorr[filterid] = 0 for islice, (begin, end) in enumerate(zip(begins, ends)): # print "Progress: %#2d/%2d"% (islice+1,slices) trame2h = trames[:, begin:end] rmsmat = np.std(np.abs(trame2h), axis=1) for filterdb in get_filters(db, all=False): filterid = filterdb.ref low = float(filterdb.low) high = float(filterdb.high) rms_threshold = filterdb.rms_threshold Nfft = int(params.min30) if params.min30 / 2 % 2 != 0: Nfft = params.min30 + 2 trames2hWb = np.zeros((2, int(Nfft)), dtype=np.complex) skip = False for i, station in enumerate(pair): if rmsmat[i] > rms_threshold: cp = cosTaper(len(trame2h[i]), 0.04) trame2h[i] -= trame2h[i].mean() if params.windsorizing == -1: trame2h[i] = np.sign(trame2h[i]) elif params.windsorizing != 0: indexes = np.where(np.abs(trame2h[i]) > (params.windsorizing * rmsmat[i]))[0] # clipping at windsorizing*rms trame2h[i][indexes] = ( (trame2h[i][indexes] / np.abs(trame2h[i][indexes])) * params.windsorizing * rmsmat[i] ) trames2hWb[i] = whiten(trame2h[i] * cp, Nfft, dt, low, high, plot=False) else: trames2hWb[i] = np.zeros(int(Nfft)) skip = True logging.debug("Slice is Zeros!") if not skip: corr = myCorr(trames2hWb, np.ceil(params.maxlag / dt), plot=False) tmptime = time.gmtime(basetime + begin / params.goal_sampling_rate) thisdate = time.strftime("%Y-%m-%d", tmptime) thistime = time.strftime("%Y-%m-%d %H:%M:%S", tmptime) if params.keep_all: ccfid = "%s_%s_%s_%s_%s" % (station1, station2, filterid, components, thisdate) if ccfid not in allcorr: allcorr[ccfid] = {} allcorr[ccfid][thistime] = corr if params.keep_days: if not np.any(np.isnan(corr)) and not np.any(np.isinf(corr)): daycorr[filterid] += corr ndaycorr[filterid] += 1 del corr, thistime, trames2hWb if params.keep_all: for ccfid in allcorr.keys(): export_allcorr(db, ccfid, allcorr[ccfid]) if params.keep_days: try: for filterdb in get_filters(db, all=False): filterid = filterdb.ref corr = daycorr[filterid] ncorr = ndaycorr[filterid] if ncorr > 0: logging.debug( "Saving daily CCF for filter %02i, comp %s (stack of %02i CCF)" % (filterid, components, ncorr) ) thisdate = time.strftime("%Y-%m-%d", time.gmtime(basetime)) thistime = time.strftime("%H_%M", time.gmtime(basetime)) add_corr( db, station1.replace(".", "_"), station2.replace(".", "_"), filterid, thisdate, thistime, params.min30 / params.goal_sampling_rate, components, corr, params.goal_sampling_rate, day=True, ncorr=ncorr, ) del corr, ncorr except Exception as e: logging.debug(str(e)) del trames, daycorr, ndaycorr logging.debug("Updating Job") update_job(db, goal_day, orig_pair, "CC", "D") logging.info("Finished processing this pair. It took %.2f seconds" % (time.time() - tt)) logging.info("Job Finished. It took %.2f seconds" % (time.time() - jt)) logging.info("*** Finished: Compute CC ***")
# logging.debug("rmsmat[i] = %f" % rmsmat[i]) if rmsmat[i] > rms_threshold: if windsorizing != 0: indexes = np.where( np.abs(trame2h[i]) > (windsorizing * rmsmat[i]))[0] #clipping at windsorizing*rms trame2h[i][indexes] = ( trame2h[i][indexes] / np.abs(trame2h[i][indexes]) ) * windsorizing * rmsmat[i] # logging.debug('whiten') trames2hWb[i] = whiten(trame2h[i], min30, dt, low, high, plot=False) else: # logging.debug("Station no %d, pas de pretraitement car rms < %f ou NaN"% (i, rms_threshold)) trames2hWb[i] = trame2h[i] corr = myCorr(trames2hWb, np.ceil(maxlag / dt), plot=False) thisdate = time.strftime( "%Y-%m-%d", time.gmtime(basetime + itranche * min30 / fe)) thistime = time.strftime( "%H_%M", time.gmtime(basetime + itranche * min30 / fe)) if keep_all: add_corr(db, station1.replace('.', '_'), station2.replace('.',