def __init__(self, headername, liststations, filename, outputdir): self.filename = filename self.outputdir = outputdir aa = read_ukmo(headername, liststations, filename) # unique IDs ids = npunique(aa.csvdata['ID']) for identifier in ids: try: lon = aa.stationdata[identifier.strip()]['longitude'] lat = aa.stationdata[identifier.strip()]['latitude'] elevation = aa.stationdata[identifier.strip()]['elevation'] except KeyError: continue if (lon == -99999) or (lat == -99999) or (elevation == -99999): continue # list of indices idx = npwhere(aa.csvdata['ID'] == identifier)[0] # extract all keys for selected station identifier dataout = collections.defaultdict(list) dataout = dict( (k, nparray(aa.csvdata[k])[idx]) for k in aa.csvdata.keys()) stationid = dataout['ID'][0] # remove variables from dictionary dataout.pop('longitude', None) dataout.pop('latitude', None) dataout.pop('elevation', None) dataout.pop('ID', None) # create netcdf file filename = self.define_output_file(stationid) self.write_netcdf(filename, dataout, lon, lat, elevation)
def stellingwerf_pdm_theta(times, mags, errs, frequency, binsize=0.05, minbin=9): ''' This calculates the Stellingwerf PDM theta value at a test frequency. ''' period = 1.0/frequency fold_time = times[0] phased = phase_magseries(times, mags, period, fold_time, wrap=False, sort=True) phases = phased['phase'] pmags = phased['mags'] bins = np.arange(0.0, 1.0, binsize) nbins = bins.size binnedphaseinds = npdigitize(phases, bins) binvariances = [] binndets = [] goodbins = 0 for x in npunique(binnedphaseinds): thisbin_inds = binnedphaseinds == x thisbin_phases = phases[thisbin_inds] thisbin_mags = pmags[thisbin_inds] if thisbin_mags.size > minbin: thisbin_variance = npvar(thisbin_mags,ddof=1) binvariances.append(thisbin_variance) binndets.append(thisbin_mags.size) goodbins = goodbins + 1 # now calculate theta binvariances = nparray(binvariances) binndets = nparray(binndets) theta_top = npsum(binvariances*(binndets - 1)) / (npsum(binndets) - goodbins) theta_bot = npvar(pmags,ddof=1) theta = theta_top/theta_bot return theta
def pwd_phasebin(phases, mags, binsize=0.002, minbin=9): ''' This bins the phased mag series using the given binsize. ''' bins = np.arange(0.0, 1.0, binsize) binnedphaseinds = npdigitize(phases, bins) binnedphases, binnedmags = [], [] for x in npunique(binnedphaseinds): thisbin_inds = binnedphaseinds == x thisbin_phases = phases[thisbin_inds] thisbin_mags = mags[thisbin_inds] if thisbin_inds.size > minbin: binnedphases.append(npmedian(thisbin_phases)) binnedmags.append(npmedian(thisbin_mags)) return np.array(binnedphases), np.array(binnedmags)
def aov_theta(times, mags, errs, frequency, binsize=0.05, minbin=9): '''Calculates the Schwarzenberg-Czerny AoV statistic at a test frequency. Parameters ---------- times,mags,errs : np.array The input time-series and associated errors. frequency : float The test frequency to calculate the theta statistic at. binsize : float The phase bin size to use. minbin : int The minimum number of items in a phase bin to consider in the calculation of the statistic. Returns ------- theta_aov : float The value of the AoV statistic at the specified `frequency`. ''' period = 1.0/frequency fold_time = times[0] phased = phase_magseries(times, mags, period, fold_time, wrap=False, sort=True) phases = phased['phase'] pmags = phased['mags'] bins = nparange(0.0, 1.0, binsize) ndets = phases.size binnedphaseinds = npdigitize(phases, bins) bin_s1_tops = [] bin_s2_tops = [] binndets = [] goodbins = 0 all_xbar = npmedian(pmags) for x in npunique(binnedphaseinds): thisbin_inds = binnedphaseinds == x thisbin_mags = pmags[thisbin_inds] if thisbin_mags.size > minbin: thisbin_ndet = thisbin_mags.size thisbin_xbar = npmedian(thisbin_mags) # get s1 thisbin_s1_top = ( thisbin_ndet * (thisbin_xbar - all_xbar) * (thisbin_xbar - all_xbar) ) # get s2 thisbin_s2_top = npsum((thisbin_mags - all_xbar) * (thisbin_mags - all_xbar)) bin_s1_tops.append(thisbin_s1_top) bin_s2_tops.append(thisbin_s2_top) binndets.append(thisbin_ndet) goodbins = goodbins + 1 # turn the quantities into arrays bin_s1_tops = nparray(bin_s1_tops) bin_s2_tops = nparray(bin_s2_tops) binndets = nparray(binndets) # calculate s1 first s1 = npsum(bin_s1_tops)/(goodbins - 1.0) # then calculate s2 s2 = npsum(bin_s2_tops)/(ndets - goodbins) theta_aov = s1/s2 return theta_aov
def stellingwerf_pdm_theta(times, mags, errs, frequency, binsize=0.05, minbin=9): ''' This calculates the Stellingwerf PDM theta value at a test frequency. Parameters ---------- times,mags,errs : np.array The input time-series and associated errors. frequency : float The test frequency to calculate the theta statistic at. binsize : float The phase bin size to use. minbin : int The minimum number of items in a phase bin to consider in the calculation of the statistic. Returns ------- theta_pdm : float The value of the theta statistic at the specified `frequency`. ''' period = 1.0 / frequency fold_time = times[0] phased = phase_magseries(times, mags, period, fold_time, wrap=False, sort=True) phases = phased['phase'] pmags = phased['mags'] bins = nparange(0.0, 1.0, binsize) binnedphaseinds = npdigitize(phases, bins) binvariances = [] binndets = [] goodbins = 0 for x in npunique(binnedphaseinds): thisbin_inds = binnedphaseinds == x thisbin_mags = pmags[thisbin_inds] if thisbin_mags.size > minbin: thisbin_variance = npvar(thisbin_mags, ddof=1) binvariances.append(thisbin_variance) binndets.append(thisbin_mags.size) goodbins = goodbins + 1 # now calculate theta binvariances = nparray(binvariances) binndets = nparray(binndets) theta_top = npsum(binvariances * (binndets - 1)) / (npsum(binndets) - goodbins) theta_bot = npvar(pmags, ddof=1) theta = theta_top / theta_bot return theta
def aov_theta(times, mags, errs, frequency, binsize=0.05, minbin=9): '''Calculates the Schwarzenberg-Czerny AoV statistic at a test frequency. ''' period = 1.0 / frequency fold_time = times[0] phased = phase_magseries(times, mags, period, fold_time, wrap=False, sort=True) phases = phased['phase'] pmags = phased['mags'] bins = np.arange(0.0, 1.0, binsize) nbins = bins.size ndets = phases.size binnedphaseinds = npdigitize(phases, bins) bin_s1_tops = [] bin_s2_tops = [] binndets = [] goodbins = 0 all_xbar = npmedian(pmags) for x in npunique(binnedphaseinds): thisbin_inds = binnedphaseinds == x thisbin_phases = phases[thisbin_inds] thisbin_mags = pmags[thisbin_inds] if thisbin_mags.size > minbin: thisbin_ndet = thisbin_mags.size thisbin_xbar = npmedian(thisbin_mags) # get s1 thisbin_s1_top = (thisbin_ndet * (thisbin_xbar - all_xbar) * (thisbin_xbar - all_xbar)) # get s2 thisbin_s2_top = npsum( (thisbin_mags - all_xbar) * (thisbin_mags - all_xbar)) bin_s1_tops.append(thisbin_s1_top) bin_s2_tops.append(thisbin_s2_top) binndets.append(thisbin_ndet) goodbins = goodbins + 1 # turn the quantities into arrays bin_s1_tops = nparray(bin_s1_tops) bin_s2_tops = nparray(bin_s2_tops) binndets = nparray(binndets) # calculate s1 first s1 = npsum(bin_s1_tops) / (goodbins - 1.0) # then calculate s2 s2 = npsum(bin_s2_tops) / (ndets - goodbins) theta_aov = s1 / s2 return theta_aov
def execArisel(y, w, pRegions, inits = 3, initialSolution = [], convTabu = 0, tabuLength = 10): """Automatic Rationalization with Initial Seed Location ARiSeL, proposed by [Duque_Church2004]_ , aggregates N areas into P spatially contiguous regions while minimizing intra-regional heterogeneity (measured as the within-cluster sum of squares from each area to the attribute centroid of its cluster). This algorithm is a modification of Openshaw's AZP-tabu [Openshaw_Rao1995]_. In ARISeL the construction of a initial feasible solution is repeated several times (inits) before running Tabu Search algorithm [Glover1977]_. Duque and Church argue that: - constructing and initial feasible solution is computationally less expensive than performing local search. - local search by moving bordering areas between region do not allow an extensive search in the solution space and it is computationally expensive. Based on those two ideas, the authors propose to generate as many different initial feasible solutions and run Tabu search on the best initial solution obtained so far. The initial solution follows a "growing regions" strategy. It starts with a initial set of seeds (as many seed as regions) selected using the K-means++ algorithm. From those seeds, other neighbouring areas are assigned to its closest (in attribute space) growing region. This strategy has proven better results. :: Layer.cluster('arisel', vars, regions, <wType>, <std>, <inits>, <initialSolution>, <convTabu>, <tabuLength>, <dissolve>, <dataOperations>) :keyword vars: Area attribute(s) (e.g. ['SAR1','SAR2']) :type vars: list :keyword regions: Number of regions :type regions: integer :keyword wType: Type of first-order contiguity-based spatial matrix: 'rook' or 'queen'. Default value wType = 'rook'. :type wType: string :keyword std: If = 1, then the variables will be standardized. :type std: binary :keyword inits: number of initial feasible solutions to be constructed before applying Tabu Search. :type inits: integer. Default value inits = 5. :keyword initialSolution: List with a initial solution vector. It is useful when the user wants a solution that is not very different from a preexisting solution (e.g. municipalities,districts, etc.). Note that the number of regions will be the same as the number of regions in the initial feasible solution (regardless the value you assign to parameter "regions"). IMPORTANT: make sure you are entering a feasible solution and according to the W matrix you selected, otherwise the algorithm will not converge. :type initialSolution: list :keyword convTabu: Stop the search after convTabu nonimproving moves (nonimproving moves are those moves that do not improve the current solution. Note that "improving moves" are different to "aspirational moves"). If convTabu=0 the algorithm will stop after Int(M/N) nonimproving moves. Default value convTabu = 0. :type convTabu: integer :keyword tabuLength: Number of times a reverse move is prohibited. Default value *tabuLength = 10*. :type tabuLength: integer :keyword dissolve: If = 1, then you will get a "child" instance of the layer that contains the new regions. Default value *dissolve = 0*. **Note:**. Each child layer is saved in the attribute *layer.results*. The first algorithm that you run with *dissolve=1* will have a child layer in *layer.results[0]*; the second algorithm that you run with *dissolve=1* will be in *layer.results[1]*, and so on. You can export a child as a shapefile with *layer.result[<1,2,3..>].exportArcData('filename')* :type dissolve: binary :keyword dataOperations: Dictionary which maps a variable to a list of operations to run on it. The dissolved layer will contains in it's data all the variables specified in this dictionary. Be sure to check the input layer's fieldNames before use this utility. :type dataOperations: dictionary The dictionary structure must be as showed bellow. >>> X = {} >>> X[variableName1] = [function1, function2,....] >>> X[variableName2] = [function1, function2,....] Where functions are strings wich represents the name of the functions to be used on the given variableName. Functions could be,'sum','mean','min','max','meanDesv','stdDesv','med', 'mode','range','first','last','numberOfAreas. By deffault just ID variable is added to the dissolved map. """ lenY = len(y) start = 0.0 time2 = 0.0 print "Running original Arisel algorithm" print "Number of areas: ", lenY if initialSolution: print "Number of regions: ", len(npunique(initialSolution)) pRegions = len(set(initialSolution)) else: print "Number of regions: ", pRegions if pRegions >= lenY: message = "\n WARNING: You are aggregating "+str(lenY)+" into"+\ str(pRegions)+" regions!!. The number of regions must be an integer"+\ " number lower than the number of areas being aggregated" raise Exception(message) if convTabu <= 0: convTabu = lenY/pRegions # convTabu = 230*numpy.sqrt(pRegions) distanceType = "EuclideanSquared" distanceStat = "Centroid" objectiveFunctionType = "SS" selectionType = "Minimum" am = AreaManager(w, y, distanceType) extendedMemory = ExtendedMemory() pool = Pool(processes = cpu_count()) procs = [] start = tm.time() for dummy in xrange(inits): ans = pool.apply_async(constructPossible, [am, pRegions, initialSolution, distanceType, distanceStat, selectionType, objectiveFunctionType]) procs.append(ans) results = [] for p in procs: results.append(p.get()) tmp_ans = extendedMemory for rm in results: if rm.objInfo < tmp_ans.objInfo: tmp_ans = rm rm = tmp_ans extendedMemory.updateExtendedMemory(rm) rm.recoverFromExtendedMemory(extendedMemory) print "INITIAL SOLUTION: ", rm.returnRegions(), "\nINITIAL OF: ", rm.objInfo rm.tabuMove(tabuLength=tabuLength, convTabu=convTabu) time2 = tm.time() - start Sol = rm.regions Of = rm.objInfo print "FINAL SOLUTION: ", Sol, "\nFINAL OF: ", Of output = { "objectiveFunction": Of, "runningTime": time2, "algorithm": "arisel", "regions": len(Sol), "r2a": Sol, "distanceType": distanceType, "distanceStat": distanceStat, "selectionType": selectionType, "ObjectiveFuncionType": objectiveFunctionType} return output