Beispiel #1
0
 def __init__(self, headername, liststations, filename, outputdir):
     self.filename = filename
     self.outputdir = outputdir
     aa = read_ukmo(headername, liststations, filename)
     # unique IDs
     ids = npunique(aa.csvdata['ID'])
     for identifier in ids:
         try:
             lon = aa.stationdata[identifier.strip()]['longitude']
             lat = aa.stationdata[identifier.strip()]['latitude']
             elevation = aa.stationdata[identifier.strip()]['elevation']
         except KeyError:
             continue
         if (lon == -99999) or (lat == -99999) or (elevation == -99999):
             continue
         # list of indices
         idx = npwhere(aa.csvdata['ID'] == identifier)[0]
         # extract all keys for selected station identifier
         dataout = collections.defaultdict(list)
         dataout = dict(
             (k, nparray(aa.csvdata[k])[idx]) for k in aa.csvdata.keys())
         stationid = dataout['ID'][0]
         # remove variables from dictionary
         dataout.pop('longitude', None)
         dataout.pop('latitude', None)
         dataout.pop('elevation', None)
         dataout.pop('ID', None)
         # create netcdf file
         filename = self.define_output_file(stationid)
         self.write_netcdf(filename, dataout, lon, lat, elevation)
Beispiel #2
0
def stellingwerf_pdm_theta(times, mags, errs, frequency,
                           binsize=0.05, minbin=9):
    '''
    This calculates the Stellingwerf PDM theta value at a test frequency.

    '''

    period = 1.0/frequency
    fold_time = times[0]

    phased = phase_magseries(times,
                             mags,
                             period,
                             fold_time,
                             wrap=False,
                             sort=True)

    phases = phased['phase']
    pmags = phased['mags']
    bins = np.arange(0.0, 1.0, binsize)
    nbins = bins.size

    binnedphaseinds = npdigitize(phases, bins)

    binvariances = []
    binndets = []
    goodbins = 0

    for x in npunique(binnedphaseinds):

        thisbin_inds = binnedphaseinds == x
        thisbin_phases = phases[thisbin_inds]
        thisbin_mags = pmags[thisbin_inds]

        if thisbin_mags.size > minbin:
            thisbin_variance = npvar(thisbin_mags,ddof=1)
            binvariances.append(thisbin_variance)
            binndets.append(thisbin_mags.size)
            goodbins = goodbins + 1

    # now calculate theta
    binvariances = nparray(binvariances)
    binndets = nparray(binndets)

    theta_top = npsum(binvariances*(binndets - 1)) / (npsum(binndets) -
                                                      goodbins)
    theta_bot = npvar(pmags,ddof=1)
    theta = theta_top/theta_bot

    return theta
Beispiel #3
0
def pwd_phasebin(phases, mags, binsize=0.002, minbin=9):
    '''
    This bins the phased mag series using the given binsize.

    '''

    bins = np.arange(0.0, 1.0, binsize)
    binnedphaseinds = npdigitize(phases, bins)

    binnedphases, binnedmags = [], []

    for x in npunique(binnedphaseinds):

        thisbin_inds = binnedphaseinds == x
        thisbin_phases = phases[thisbin_inds]
        thisbin_mags = mags[thisbin_inds]
        if thisbin_inds.size > minbin:
            binnedphases.append(npmedian(thisbin_phases))
            binnedmags.append(npmedian(thisbin_mags))

    return np.array(binnedphases), np.array(binnedmags)
Beispiel #4
0
def aov_theta(times, mags, errs, frequency,
              binsize=0.05, minbin=9):
    '''Calculates the Schwarzenberg-Czerny AoV statistic at a test frequency.

    Parameters
    ----------

    times,mags,errs : np.array
        The input time-series and associated errors.

    frequency : float
        The test frequency to calculate the theta statistic at.

    binsize : float
        The phase bin size to use.

    minbin : int
        The minimum number of items in a phase bin to consider in the
        calculation of the statistic.

    Returns
    -------

    theta_aov : float
        The value of the AoV statistic at the specified `frequency`.

    '''

    period = 1.0/frequency
    fold_time = times[0]

    phased = phase_magseries(times,
                             mags,
                             period,
                             fold_time,
                             wrap=False,
                             sort=True)

    phases = phased['phase']
    pmags = phased['mags']
    bins = nparange(0.0, 1.0, binsize)
    ndets = phases.size

    binnedphaseinds = npdigitize(phases, bins)

    bin_s1_tops = []
    bin_s2_tops = []
    binndets = []
    goodbins = 0

    all_xbar = npmedian(pmags)

    for x in npunique(binnedphaseinds):

        thisbin_inds = binnedphaseinds == x
        thisbin_mags = pmags[thisbin_inds]

        if thisbin_mags.size > minbin:

            thisbin_ndet = thisbin_mags.size
            thisbin_xbar = npmedian(thisbin_mags)

            # get s1
            thisbin_s1_top = (
                thisbin_ndet *
                (thisbin_xbar - all_xbar) *
                (thisbin_xbar - all_xbar)
            )

            # get s2
            thisbin_s2_top = npsum((thisbin_mags - all_xbar) *
                                   (thisbin_mags - all_xbar))

            bin_s1_tops.append(thisbin_s1_top)
            bin_s2_tops.append(thisbin_s2_top)
            binndets.append(thisbin_ndet)
            goodbins = goodbins + 1


    # turn the quantities into arrays
    bin_s1_tops = nparray(bin_s1_tops)
    bin_s2_tops = nparray(bin_s2_tops)
    binndets = nparray(binndets)

    # calculate s1 first
    s1 = npsum(bin_s1_tops)/(goodbins - 1.0)

    # then calculate s2
    s2 = npsum(bin_s2_tops)/(ndets - goodbins)

    theta_aov = s1/s2

    return theta_aov
Beispiel #5
0
def stellingwerf_pdm_theta(times,
                           mags,
                           errs,
                           frequency,
                           binsize=0.05,
                           minbin=9):
    '''
    This calculates the Stellingwerf PDM theta value at a test frequency.

    Parameters
    ----------

    times,mags,errs : np.array
        The input time-series and associated errors.

    frequency : float
        The test frequency to calculate the theta statistic at.

    binsize : float
        The phase bin size to use.

    minbin : int
        The minimum number of items in a phase bin to consider in the
        calculation of the statistic.

    Returns
    -------

    theta_pdm : float
        The value of the theta statistic at the specified `frequency`.


    '''

    period = 1.0 / frequency
    fold_time = times[0]

    phased = phase_magseries(times,
                             mags,
                             period,
                             fold_time,
                             wrap=False,
                             sort=True)

    phases = phased['phase']
    pmags = phased['mags']
    bins = nparange(0.0, 1.0, binsize)

    binnedphaseinds = npdigitize(phases, bins)

    binvariances = []
    binndets = []
    goodbins = 0

    for x in npunique(binnedphaseinds):

        thisbin_inds = binnedphaseinds == x
        thisbin_mags = pmags[thisbin_inds]

        if thisbin_mags.size > minbin:
            thisbin_variance = npvar(thisbin_mags, ddof=1)
            binvariances.append(thisbin_variance)
            binndets.append(thisbin_mags.size)
            goodbins = goodbins + 1

    # now calculate theta
    binvariances = nparray(binvariances)
    binndets = nparray(binndets)

    theta_top = npsum(binvariances *
                      (binndets - 1)) / (npsum(binndets) - goodbins)
    theta_bot = npvar(pmags, ddof=1)
    theta = theta_top / theta_bot

    return theta
Beispiel #6
0
def aov_theta(times, mags, errs, frequency, binsize=0.05, minbin=9):
    '''Calculates the Schwarzenberg-Czerny AoV statistic at a test frequency.

    '''

    period = 1.0 / frequency
    fold_time = times[0]

    phased = phase_magseries(times,
                             mags,
                             period,
                             fold_time,
                             wrap=False,
                             sort=True)

    phases = phased['phase']
    pmags = phased['mags']
    bins = np.arange(0.0, 1.0, binsize)
    nbins = bins.size
    ndets = phases.size

    binnedphaseinds = npdigitize(phases, bins)

    bin_s1_tops = []
    bin_s2_tops = []
    binndets = []
    goodbins = 0

    all_xbar = npmedian(pmags)

    for x in npunique(binnedphaseinds):

        thisbin_inds = binnedphaseinds == x
        thisbin_phases = phases[thisbin_inds]
        thisbin_mags = pmags[thisbin_inds]

        if thisbin_mags.size > minbin:

            thisbin_ndet = thisbin_mags.size
            thisbin_xbar = npmedian(thisbin_mags)

            # get s1
            thisbin_s1_top = (thisbin_ndet * (thisbin_xbar - all_xbar) *
                              (thisbin_xbar - all_xbar))

            # get s2
            thisbin_s2_top = npsum(
                (thisbin_mags - all_xbar) * (thisbin_mags - all_xbar))

            bin_s1_tops.append(thisbin_s1_top)
            bin_s2_tops.append(thisbin_s2_top)
            binndets.append(thisbin_ndet)
            goodbins = goodbins + 1

    # turn the quantities into arrays
    bin_s1_tops = nparray(bin_s1_tops)
    bin_s2_tops = nparray(bin_s2_tops)
    binndets = nparray(binndets)

    # calculate s1 first
    s1 = npsum(bin_s1_tops) / (goodbins - 1.0)

    # then calculate s2
    s2 = npsum(bin_s2_tops) / (ndets - goodbins)

    theta_aov = s1 / s2

    return theta_aov
Beispiel #7
0
def execArisel(y, w, pRegions, inits = 3, initialSolution = [],
               convTabu = 0, tabuLength = 10):
    """Automatic Rationalization with Initial Seed Location

    ARiSeL, proposed by [Duque_Church2004]_ , aggregates N areas into P
    spatially contiguous regions while minimizing intra-regional heterogeneity
    (measured as the within-cluster sum of squares from each area to the
    attribute centroid of its cluster). This algorithm is a modification of
    Openshaw's AZP-tabu [Openshaw_Rao1995]_. In ARISeL the construction of a
    initial feasible solution is repeated several times (inits) before
    running Tabu Search algorithm [Glover1977]_.


    Duque and Church argue that:


        - constructing and initial feasible solution is computationally less
        expensive than performing local search.


        - local search by moving bordering areas between region do not allow
        an extensive search in the solution space and it is computationally
        expensive.


    Based on those two ideas, the authors propose to generate as many
    different initial feasible solutions and run Tabu search on the best
    initial solution obtained so far.


    The initial solution follows a "growing regions" strategy. It starts with
    a initial set of seeds (as many seed as regions) selected using the
    K-means++ algorithm. From those seeds, other neighbouring areas are
    assigned to its closest (in attribute space) growing region. This strategy
    has proven better results. ::

        Layer.cluster('arisel', vars, regions, <wType>, <std>, <inits>,
        <initialSolution>, <convTabu>, <tabuLength>,
        <dissolve>, <dataOperations>)

    :keyword vars: Area attribute(s) (e.g. ['SAR1','SAR2'])
    :type vars: list
    :keyword regions: Number of regions
    :type regions: integer
    :keyword wType: Type of first-order contiguity-based spatial matrix: 'rook'
    or 'queen'. Default value wType = 'rook'.
    :type wType: string
    :keyword std: If = 1, then the variables will be standardized.
    :type std: binary
    :keyword inits: number of initial feasible solutions to be constructed
    before applying Tabu Search.
    :type inits: integer. Default value inits = 5.
    :keyword initialSolution: List with a initial solution vector. It is useful
    when the user wants a solution that is not very different from a preexisting
    solution (e.g. municipalities,districts, etc.). Note that the number of
    regions will be the same as the number of regions in the initial feasible
    solution (regardless the value you assign to parameter "regions").
    IMPORTANT: make sure you are entering a feasible solution and according to
    the W matrix you selected, otherwise the algorithm will not converge.
    :type initialSolution: list
    :keyword convTabu: Stop the search after convTabu nonimproving moves
    (nonimproving moves are those moves that do not improve the current
    solution.
    Note that "improving moves" are different to "aspirational moves").
    If convTabu=0 the algorithm will stop after Int(M/N) nonimproving moves.
    Default value convTabu = 0.
    :type convTabu: integer
    :keyword tabuLength: Number of times a reverse move is prohibited. Default
    value *tabuLength = 10*.
    :type tabuLength: integer
    :keyword dissolve: If = 1, then you will get a "child" instance of the layer
    that contains the new regions. Default value *dissolve = 0*.  **Note:**.
    Each child layer is saved in the attribute *layer.results*.  The first
    algorithm that you run with *dissolve=1* will have a child layer in
    *layer.results[0]*; the second algorithm that you run with *dissolve=1* will
    be in *layer.results[1]*, and so on. You can export a child as a shapefile
    with *layer.result[<1,2,3..>].exportArcData('filename')*
    :type dissolve: binary
    :keyword dataOperations: Dictionary which maps a variable to a list of
    operations to run on it. The dissolved layer will contains in it's data all
    the variables specified in this dictionary. Be sure to check the input
    layer's fieldNames before use this utility.
    :type dataOperations: dictionary

    The dictionary structure must be as showed bellow.

    >>> X = {}
    >>> X[variableName1] = [function1, function2,....]
    >>> X[variableName2] = [function1, function2,....]

    Where functions are strings wich represents the name of the
    functions to be used on the given variableName. Functions
    could be,'sum','mean','min','max','meanDesv','stdDesv','med',
    'mode','range','first','last','numberOfAreas. By deffault just
    ID variable is added to the dissolved map.

    """
    lenY = len(y)
    start = 0.0
    time2 = 0.0

    print "Running original Arisel algorithm"
    print "Number of areas: ", lenY
    if initialSolution:
        print "Number of regions: ", len(npunique(initialSolution))
        pRegions = len(set(initialSolution))
    else:
        print "Number of regions: ", pRegions
    if pRegions >= lenY:
        message = "\n WARNING: You are aggregating "+str(lenY)+" into"+\
        str(pRegions)+" regions!!. The number of regions must be an integer"+\
        " number lower than the number of areas being aggregated"
        raise Exception(message)

    if convTabu <= 0:
        convTabu = lenY/pRegions  #   convTabu = 230*numpy.sqrt(pRegions)
    distanceType = "EuclideanSquared"
    distanceStat = "Centroid"
    objectiveFunctionType = "SS"
    selectionType = "Minimum"
    am = AreaManager(w, y, distanceType)
    extendedMemory = ExtendedMemory()

    pool = Pool(processes = cpu_count())
    procs = []

    start = tm.time()
    for dummy in xrange(inits):
        ans = pool.apply_async(constructPossible, [am, pRegions,
                                                   initialSolution,
                                                   distanceType,
                                                   distanceStat,
                                                   selectionType,
                                                   objectiveFunctionType])
        procs.append(ans)

    results = []
    for p in procs:
        results.append(p.get())

    tmp_ans = extendedMemory
    for rm in results:
        if rm.objInfo < tmp_ans.objInfo:
            tmp_ans = rm
    rm = tmp_ans
    extendedMemory.updateExtendedMemory(rm)

    rm.recoverFromExtendedMemory(extendedMemory)
    print "INITIAL SOLUTION: ", rm.returnRegions(), "\nINITIAL OF: ", rm.objInfo
    rm.tabuMove(tabuLength=tabuLength, convTabu=convTabu)
    time2 = tm.time() - start
    Sol = rm.regions
    Of = rm.objInfo
    print "FINAL SOLUTION: ", Sol, "\nFINAL OF: ", Of
    output = { "objectiveFunction": Of,
               "runningTime": time2,
               "algorithm": "arisel",
               "regions": len(Sol),
               "r2a": Sol,
               "distanceType": distanceType,
               "distanceStat": distanceStat,
               "selectionType": selectionType,
               "ObjectiveFuncionType": objectiveFunctionType}
    return output