def CCMGetDistances(libraryMatrix, args): ''' Note that for CCM the libraryMatrix and predictionMatrix are the same. Return Distances: a square matrix with distances. Matrix elements D[i,j] hold the distance between the E-dimensional phase space point (vector) between rows (observations) i and j. ''' N_row = nRow(libraryMatrix) D = np.full((N_row, N_row), 1E30) # Distance matrix init to 1E30 E = args.E + 1 for row in range(N_row): # Get E-dimensional vector from this library row # Exclude the 1st column (j=0) of times y = libraryMatrix[row, 1:E:] for col in range(N_row - 1): # Avoid redundant calculations if row >= col: continue # Find distance between vector (y) and other library vector # Exclude the 1st column (j=0) of Time D[row, col] = Distance(libraryMatrix[col, 1:E:], y) # Insert degenerate values since D[i,j] = D[j,i] D[col, row] = D[row, col] return (D)
def CrossMap(args): ''' Pool worker function called from CCM() ''' # Generate embedding on the data to be cross mapped (-c column) embedding, colNames, target = EmbedData(args) # Use entire library and prediction from embedding matrix libraryMatrix = predictionMatrix = embedding N_row = nRow(libraryMatrix) # Range of CCM library indices start, stop, increment = args.libsize if args.randomLib: # Random samples from library with replacement maxSamples = args.subsample else: # Contiguous samples up to the size of the library maxSamples = 1 # Simplex: if k_NN not specified, set k_NN to E + 1 if args.k_NN < 0: args.k_NN = args.E + 1 if args.verbose: print( "CCM() Set k_NN to E + 1 = " + str( args.k_NN ) +\ " for SimplexProjection." ) #----------------------------------------------------------------- print( "CCM(): Simplex cross mapping from " + str( args.columns ) +\ " to " + args.target + " E=" + str( args.E ) +\ " k_nn=" + str( args.k_NN ) +\ " Library range: [{}, {}, {}]".format( start, stop, increment )) #----------------------------------------------------------------- # Distance for all possible pred : lib E-dimensional vector pairs # Distances is a Matrix of all row to to row distances #----------------------------------------------------------------- Distances = CCMGetDistances(libraryMatrix, args) #---------------------------------------------------------- # Predictions #---------------------------------------------------------- PredictLibStats = {} # { lib_size : ( rho, r, rmse, mae ) } # Loop for library sizes for lib_size in range(start, stop + 1, increment): if args.Debug: print("CCM(): lib_size " + str(lib_size)) prediction_rho = np.zeros((maxSamples, 3)) # Loop for subsamples for n in range(maxSamples): if args.randomLib: # Uniform random sample of rows, with replacement lib_i = randint(low=0, high=N_row, size=lib_size) else: if lib_size >= N_row: # library size exceeded, back down lib_i = np.arange(0, N_row) if args.warnings or args.verbose: print("CCM(): max lib_size is {}, " "lib_size has been limited.".format(N_row)) else: # Contiguous blocks up to N_rows = maxSamples if n + lib_size < N_row: lib_i = np.arange(n, n + lib_size) else: # n + lib_size exceeds N_row, wrap around to data origin lib_start = np.arange(n, N_row) max_i = min(lib_size - (N_row - n), N_row) lib_wrap = np.arange(0, max_i) lib_i = np.concatenate((lib_start, lib_wrap), axis=0) #---------------------------------------------------------- # k_NN nearest neighbors : Local CCMGetNeighbors() function #---------------------------------------------------------- neighbors, distances = CCMGetNeighbors(Distances, lib_i, args) predictions = SimplexProjection(libraryMatrix[lib_i, :], target[lib_i], neighbors, distances, args) rho, rmse, mae = ComputeError(target[lib_i], predictions) prediction_rho[n, :] = [rho, rmse, mae] rho_ = np.mean(prediction_rho[:, 0]) rmse_ = np.mean(prediction_rho[:, 1]) mae_ = np.mean(prediction_rho[:, 2]) PredictLibStats[lib_size] = (rho_, rmse_, mae_) # Return tuple with ( ID, PredictLibStats{} ) return (str(args.columns) + " to " + args.target, PredictLibStats)
def Multiview(args, source=Source.Python): ''' Data input requires -c (columns) to specify timeseries columns in inputFile (-i) that will be embedded by EmbedData(), and the -r (target) specifying the data target column in inputFile. args.E represents the number of variables to combine for each assessment, as well as the number of time delays to create in EmbedData() for each variable. Prediction() with Simplex sets k_NN equal to E+1 if -k not specified. -- Ye H., and G. Sugihara, 2016. Information leverage in interconnected ecosystems: Overcoming the curse of dimensionality. Science 353:922–925. ''' if not len(args.columns): raise RuntimeError('Multiview() requires -c to specify data.') if not args.target: raise RuntimeError('Multiview() requires -r to specify target.') if args.E < 0: raise RuntimeError('Multiview() E is required.') # Save args.plot flag, and disable so Prediction() does not plot showPlot = args.plot args.plot = False # Save args.outputFile and reset so Prediction() does not write outputFile = args.outputFile args.outputFile = None # Embed data from inputFile embedding, colNames, target = EmbedData(args) # Combinations of possible embedding variables (columns), E at-a-time # Column 0 is time. Coerce the iterable into a list of E-tuples nVar = len(args.columns) combos = list(combinations(range(1, nVar * args.E + 1), args.E)) # Require that each embedding has at least one coordinate with # observed data (zero time lag). This corresponds to combo tuples # with modulo E == 1. # Note: this only works if the data (unlagged) are in columns # 1, 1 + E, 1 + 2E, ... which is consistent with EmbedData() output. combo_i = [] for i in range(len(combos)): c = combos[i] # a tuple of combination indices for x in c: if x % args.E == 1: combo_i.append(i) break combos = [combos[i] for i in combo_i] if not args.multiview: # Ye & Sugihara suggest sqrt( m ) as the number of embeddings to avg args.multiview = max(2, int(np.sqrt(len(combos)))) print('Multiview() Set view sample size to ' + str(args.multiview)) #--------------------------------------------------------------- # Evaluate variable combinations. # Note that this is done within the library itself (in-sample). # Save a copy of the specified prediction observations. prediction = args.prediction # Override the args.prediction for in-sample forecast skill evaluation args.prediction = args.library # Process pool to evaluate combos pool = Pool() # Iterable list of arguments for EvalLib() argList = [] for combo in combos: argList.append((args, combo, embedding, colNames, target)) # Submit EvalLib jobs to the process pool results = pool.map(EvalLib, argList) # Dict to hold combos : rho pairs from EvalLib() tuple Combo_rho = {} for result in results: if result == None: continue Combo_rho[result[0]] = result[1] #--------------------------------------------------------------- # Rank the in-sample forecasts, zip returns an iterator of 1-tuples rho_sort, combo_sort = zip( *sorted(zip(Combo_rho.values(), Combo_rho.keys()), reverse=True)) if args.Debug: print("Multiview() In sample sorted embeddings:") print('Columns ρ') for i in range(min(args.multiview, len(combo_sort))): print(str(combo_sort[i]) + " " + str(round(rho_sort[i], 4))) #--------------------------------------------------------------- # Perform predictions with the top args.multiview embeddings # Reset the user specified prediction vector args.prediction = prediction argList.clear() # Iterable list of arguments for EvalPred() # Take the top args.multiview combos for combo in combo_sort[0:args.multiview]: argList.append((args, combo, embedding, colNames, target)) # Submit EvalPred jobs to the process pool results = pool.map(EvalPred, argList) Results = OrderedDict() # Dictionary of dictionaries results each combo for result in results: if result == None: continue Results[result[0]] = result[1] # Console output print("Multiview() Prediction Embeddings:") print("Columns Names ρ mae rmse") for key in Results.keys(): result = Results[key] print( str( key ) + " " + ' '.join( result[ 'names' ] ) +\ " " + str( round( result[ 'rho' ], 4 ) ) +\ " " + str( round( result[ 'mae' ], 4 ) ) +\ " " + str( round( result[ 'rmse' ], 4 ) ) ) #---------------------------------------------------------- # Compute Multiview averaged prediction # The output item of Results dictionary is a matrix with three # columns [ Time, Data, Prediction_t() ] # Collect the Predictions into a single matrix aresult = Results[combo_sort[0]] nrows = nRow(aresult['output']) time = aresult['output'][:, 0] data = aresult['output'][:, 1] M = np.zeros((nrows, len(Results))) col_i = 0 for result in Results.values(): output = result['output'] M[:, col_i] = output[:, 2] # Prediction is in col j=2 col_i = col_i + 1 prediction = np.mean(M, axis=1) multiview_out = np.column_stack((time, data, prediction)) # Write output header = 'Time,Data,Prediction_t(+{0:d})'.format(args.Tp) if outputFile: np.savetxt(args.path + outputFile, multiview_out, fmt='%.4f', delimiter=',', header=header, comments='') # Estimate correlation coefficient on observed : predicted data rho, rmse, mae = ComputeError(data, prediction) print(("Multiview() ρ {0:5.3f} RMSE {1:5.3f} " "MAE {2:5.3f}").format(rho, rmse, mae)) #---------------------------------------------------------- if showPlot: Time = multiview_out[:, 0] # Required to be first (j=0) column if args.plotDate: Time = num2date(Time) fig, ax = plt.subplots(1, 1, figsize=args.figureSize, dpi=150) ax.plot(Time, multiview_out[:, 1], label='Observations', color='blue', linewidth=2) ax.plot(Time, multiview_out[:, 2], label='Predictions_t(+{0:d})'.format(args.Tp), color='red', linewidth=2) if args.verbose: # Plot all projections for col in range(nCol(M)): ax.plot(multiview_out[:, 0], M[:, col], label=combo_sort[col], linewidth=2) ax.legend() ax.set( xlabel = args.plotXLabel, ylabel = args.plotYLabel, title = "Multiview " + args.inputFile +\ ' Tp=' + str( args.Tp ) +\ ' E=' + str( args.E ) + r' $\rho$=' +\ str( round( rho, 2 ) ) ) plt.show() if source == Source.Jupyter: return { 'header': header, 'multiview': multiview_out, 'rho': rho, 'RMSE': rmse, 'MAE': mae } else: return