def setup():
    """
    Set up the run - 
        Get input arguments
        Create necessary directories
        Generate individual transcript FASTA files
        Create output files
    """
    proteomefile = sys.argv[1]
    directory = f'{sys.argv[2]}/'

    # Deletes output directory if it exists
    if os.path.isdir(directory):
        shutil.rmtree(directory, ignore_errors=True)

    os.makedirs(directory)

    generate_data(proteomefile, directory)

    main_output = f"{directory}/index_hopping_output.txt"

    Path(f"{directory}/hopper.txt").touch()
    Path(f"{directory}/final.txt").touch()
    Path(f"{directory}/never.txt").touch()
    Path(f"{directory}/maybehopper.txt").touch()
    Path(main_output).touch()

    with open(main_output, "a") as fh:
        fh.write(
            'filename  uniquely  multi  totalReads  uniquelyAGAINST  multiAGAINST  totalreadsAGAINST  percRatio\n'
        )

    return directory
Example #2
0
def get_data(config_data):
    """
    Get training and test data given parameters in config file. 
    Used for constructing the images of "randomized" shapes.
    """
    type = str(config_data["type"])
    n_samples_train = int(config_data["n_samples_train"])
    n_samples_test = int(config_data["n_samples_test"])
    size = int(config_data["size"])

    if type == "2D":

        if config_data["width"] == "random":
            width = "random"
        else:
            width = int(config_data["width"])
        if config_data["noise"] == "False":
            noise_strength = False
        else:
            noise_strength = float(config_data["noise"])
        random_size = bool(config_data["random_size"])
        regular_polygons = bool(config_data["regular_polygons"])
        flatten = bool(config_data["flatten"])

        data_train, labels_train = generate_data(
            n_samples=n_samples_train,
            size=size,
            width=width,
            noise_strength=noise_strength,
            random_size=random_size,
            regular_polygons=regular_polygons,
            flatten=True)

        data_test, labels_test = generate_data(
            n_samples=n_samples_test,
            size=size,
            width=width,
            noise_strength=noise_strength,
            random_size=random_size,
            regular_polygons=regular_polygons,
            flatten=True)

    elif type == "1D":
        data_train, labels_train = generate_1D_test(size, n_samples_train)
        data_test, labels_test = generate_1D_test(size, n_samples_train)

    return data_train, labels_train, data_test, labels_test, size, type
Example #3
0
    def generate_data(self):
        Path(self.data_dir).mkdir(parents=True, exist_ok=True)

        # If not the good length remove all
        ldir = os.listdir(self.data_dir)
        for l in ldir:
            with open(os.path.join(self.data_dir, l), 'r') as f:
                if len(f.read().split('\n')) < (self.data_size + 2):
                    os.remove(os.path.join(self.data_dir, l))

        n_existing_sample = len(os.listdir(self.data_dir))

        Tstruct = self.load_struct()
        ndag = otagr.NamedDAG(Tstruct)

        for i in range(n_existing_sample, self.data_number):
            sample = dg.generate_data(ndag, self.data_size,
                                      self.data_distribution,
                                      **self.data_parameters)
            data_file_name = "sample" + str(i + 1).zfill(2)
            sample.exportToCSVFile(
                os.path.join(self.data_dir, data_file_name) + ".csv", ',')
Example #4
0
def main(argv):
    if len(argv) > 1:
        raise app.UsageError("Too many command-line arguments.")

    assert FLAGS.min_frequency < 100, "--min_frequency higher than 100."
    assert FLAGS.max_frequency > 15250, "--max_frequency lower than 15250."

    save_directory = os.path.join(os.path.dirname(os.path.realpath(__file__)),
                                  FLAGS.save_directory)
    if not os.path.exists(save_directory):
        os.mkdir(save_directory)

    # TODO(lauraruis): define this elsewhere
    critical_bands = [
        FLAGS.min_frequency, 100, 200, 300, 400, 505, 630, 770, 915, 1080,
        1265, 1475, 1720, 1990, 2310, 2690, 3125, 3675, 4350, 5250, 6350, 7650,
        9400, 11750, 15250, FLAGS.max_frequency
    ]

    # Generate the data.
    data = data_generation.generate_data(
        num_examples_per_cb=FLAGS.examples_per_critical_band,
        desired_mean=FLAGS.desired_mean,
        desired_variance=FLAGS.desired_variance,
        min_tones=FLAGS.min_tones,
        max_tones=FLAGS.max_tones,
        clip_db=FLAGS.clip_db,
        desired_skewness=FLAGS.skewness_parameter,
        min_frequency=FLAGS.min_frequency,
        max_frequency=FLAGS.max_frequency,
        critical_bands=critical_bands,
        min_phons=FLAGS.min_phons,
        max_phons=FLAGS.max_phons)

    # Run and save some analysis on the generated data.
    (covered_num_tones_per_cb, total_unique_examples,
     total_num_examples_listeners) = data_analysis.save_data(
         data, save_directory, critical_bands)
def data_load(batch_num,
              size,
              shots,
              num_qubits,
              depth,
              max_operands,
              prob_one,
              prob_two,
              clifford,
              device=0,
              train=1):
    loader_ideal = []
    loader_noisy = []
    noise_model, basis_gates = dg.noisy_model(prob_one, prob_two)
    cbasis_gates = ['cx', 'id', 'rz', 'sx', 'x']
    length = []

    for group in range(batch_num):
        if clifford:
            if device:
                cg.generate_data(size, shots, num_qubits, cbasis_gates, device,
                                 group, batch_num, train)
            else:
                data_ideal, data_noisy, sizes = cg.generate_data(
                    size, shots, num_qubits, cbasis_gates, device, group,
                    batch_num, train)
                loader_ideal.append(data_ideal)
                loader_noisy.append(data_noisy)
                length.append(sizes)
        else:
            data_ideal, data_noisy, sizes = dg.generate_data(
                size, shots, num_qubits, depth, max_operands, noise_model,
                basis_gates)
            loader_ideal.append(data_ideal)
            loader_noisy.append(data_noisy)
            length.append(sizes)

    return (loader_ideal, loader_noisy, length)
Example #6
0
    config, dir_name = util.load_configuration(cfg_fn)

    params = util.create_param_dict(config)

    df = pd.DataFrame(util.parseParams(params))


    all_predY = None
    all_error = None
    mean_errors = []
    std_errors = []
    for iter_number in range(params['exp_details__num_iterations_per_setting']):
        # generate mobile points, base stations, and angles
        mobiles, bases, angles = data_generation.generate_data(params['data__num_pts'],
                                                               params['data__num_stations'],
                                                               params ['data__ndims'],
                                                               pts_r=3., bs_r=4,
                                                               bs_type=params['data__bs_type'],
                                                               points_type=params['data__data_dist'])

        # IMPORTANT: remember to add noise before replicating data (e.g., for snbp-mlp)
        if params['noise__addnoise_train']:
            angles, mobiles = noise_models.add_noise_dispatcher(angles, mobiles,
                                                       params['noise__noise_model'],
                                                       params['data__ndims'],
                                                       base_idxs=params['noise__bases_to_noise'],
                                                       noise_params=params['noise__noise_params'])

        if params['NN__type'] == 'snbp-mlp' or params['NN__type'] == 'smlp':
            rep_idxs = [comb for comb in itertools.combinations(range(params['data__num_stations']),2)]
            angles = data_generation.replicate_data(angles, params['data__ndims'],  rep_idxs)
                          "_data_geom_classification_"+ \
                          "_r_min_%.4f"%(r_min) + \
                          "_r_max_%.4f"%(r_max) + \
                          "_L_%.4f"%(L) + \
                          "_n_samples_" + str(n_samples) + \
                          "_n_samples_per_point_" + str(n_point_samples)

print("Using data in %s" % (dataFile))

# if the file doesn't exist we create it
if not path.exists(dataFile):
    # TODO: encapsulate all this in a function
    print("Data file does not exist, we create a new one")

    labels, \
    points = generate_data(n_samples, n_point_samples, L, r_min, r_max)

    hf = h5py.File(dataFile, 'w')

    hf.create_dataset('labels', data=labels)
    hf.create_dataset('points', data=points)

    hf.close()

# extracting the data
hf = h5py.File(dataFile, 'r')

labels_array = np.array(hf['labels'][:], dtype=np.int32)
points_array = np.array(hf['points'][:], dtype=np.float32)

# Defining the Keras model
Example #8
0
def main():

    # extras dictionary for importing to functions
    extras = {}

    ###########################################
    #
    #  S  P  I  C  E  C  O  D  E
    #
    ##########################################

    # basic .bsp filename (generic, such as de430, etc)
    extras['basic_bsp'] = 'de430.bsp'
    # .bsp filename for mission
    extras['mission_bsp'] = 'DINO_kernel.bsp'
    # .tls filename
    extras['tls'] = 'naif0011.tls'

    # prep pyswice for the extraction of initial data
    # is the only reason that we do this is for lines 165 and 166?
    pyswice.furnsh_c(bskSpicePath + 'de430.bsp')
    pyswice.furnsh_c(dinoSpicePath + 'naif0011.tls')
    pyswice.furnsh_c(dinoSpicePath + 'DINO_kernel.bsp')

    DINO_kernel = dinoSpicePath + 'DINO_kernel.bsp'
    body_int = -100  #SP.spkobj(DINO_kernel)
    body_id_str = str(body_int)

    # search_window = pyswice.new_doubleArray(2)
    # pyswice.spkcov_c(DINO_kernel, body_int, search_window)
    # list_of_events = pyswice.wnfetd_c(search_window, 0)
    # tBSP_Start = list_of_events[0]
    # tBSP_End = list_of_events[1]

    ###########################################
    # Initial condition for spacecraft
    # data = io.loadmat('saves/obsData.mat')
    # trueEphemeris = {}
    # reference of sun to sc
    # trueEphemeris['spacecraft'] = np.copy(data['stateS'])
    # # reference of sun to Earth
    # trueEphemeris['S2E'] = np.copy(data['stateE'])
    # # reference of sun to Mars
    # trueEphemeris['S2M'] = np.copy(data['stateM'])

    # time span
    # timeSpan = data['etT'].flatten()
    #Filtering End Epochs
    start_et = pyswice.new_doubleArray(1)
    end_et = pyswice.new_doubleArray(1)
    pyswice.utc2et_c('23 JUL 2020 17:00:00', start_et)
    pyswice.utc2et_c('30 JUL 2020 17:00:00', end_et)

    start_et = pyswice.doubleArray_getitem(start_et, 0)
    end_et = pyswice.doubleArray_getitem(end_et, 0)

    # body vector for SUN, EARTH, MARS
    # CODE RELIES ON SUN BEING INDEXED AS 0
    extras['bodies'] = ['SUN', '3', '399']

    # specify primary and secondary
    extras['primary'] = 0
    extras['secondary'] = [1, 2]

    # respective GP vector
    extras['mu'] = [1.32712428 * 10**11, 3.986004415 * 10**5, 4.305 * 10**4]

    # abcorr for spkzer
    extras['abcorr'] = 'NONE'

    # reference frame
    extras['ref_frame'] = 'J2000'

    # SRP parameter
    # A/M ratio multiplied by solar pressure constant at 1 AU with adjustments
    extras[
        'SRP'] = 0.3**2 / 14. * 149597870.**2 * 1358. / 299792458. / 1000.  # turboprop document Eq (64)

    # coefficient of reflectivity
    extras['cR'] = 1.

    # number of observations per beacon until moving to the next
    extras['repeat_obs'] = 1

    # SNC coefficient
    extras['SNC'] = (2 * 10**(-4))**3

    # Number of batch iterations
    extras['iterations'] = 3

    # Initializing the error
    extras['x_hat_0'] = 0

    # rng seed for debugging purposes
    extras['seed'] = 5

    ##################################################################################
    #
    # Camera/P&L Parameters
    #
    ##################################################################################

    # Focal Length (mm)
    extras['FoL'] = 100.
    angles = []
    extras['DCM_BI'] = np.eye(3)
    extras['DCM_TVB'] = np.eye(3)

    # Camera resolution (pixels)
    extras['resolution'] = [1024., 1024.]

    # width and height of pixels in camera
    extras['pixel_width'] = 5.
    extras['pixel_height'] = 5.

    # direction coefficient of pixel and line axes
    extras['pixel_direction'] = 1.
    extras['line_direction'] = 1.

    # Are we using the real dynamics for the ref or the trueData
    extras['realData'] = 'OFF'

    # Add anomaly detection parameters
    extras['anomaly'] = False
    extras['anomaly_num'] = 0
    extras['anomaly_threshold'] = 4

    ##################################################################################

    # Get Observation Times and Ephemerides. This outputs a full data set that is not
    # parsed in any way. Ephemerides for all objects at all times are given.
    trueEphemeris, timeSpan = dg.generate_data(
        sc_ephem_file=DINO_kernel,
        planet_beacons=['earth', 'mars barycenter'],
        beaconIDs=[],
        n_observations=24,
        start_et=start_et,
        end_et=end_et,
        extras=extras,
        realData=extras['realData'])

    tt_switch = 5

    print '------------------'
    print 'Filter Image Span : ', (timeSpan[-1] - timeSpan[0]) / (60 * 60 *
                                                                  24), 'days'
    print '------------------'

    # number and keys of beacons. note that the true ephem is going to have one spot for the
    # sun, which in NOT a beacon. These are used in beaconBinSPICE.
    beacon_names = trueEphemeris.keys()
    beacon_names.remove('spacecraft')
    extras['unique_beacon_IDs'] = beacon_names
    extras['n_unique_beacons'] = len(beacon_names)

    ##################################################################################
    #
    # BLOCK A page 196
    #
    ##################################################################################

    # copy the initial conditions as the first sun to SC referenceStates from the SPICE file
    IC = np.copy(trueEphemeris['spacecraft'][:, 0])

    print 'IC', IC

    # spice_derived_state is only referenced here. Should these be axed?
    spice_derived_state = pyswice.new_doubleArray(6)
    lt = pyswice.new_doubleArray(1)
    pyswice.spkezr_c(body_id_str, timeSpan[0], 'J2000', 'None', 'Sun',
                     spice_derived_state, lt)

    # a priori uncertainty for the referenceStates
    covBar = np.zeros((IC.shape[0], IC.shape[0]))
    covBar[0, 0] = 10000**2
    covBar[1, 1] = 10000**2
    covBar[2, 2] = 10000**2
    covBar[3, 3] = .1**2
    covBar[4, 4] = .1**2
    covBar[5, 5] = .1**2

    # add uncertainty to the IC
    initialPositionError = 1000 * np.divide(IC[0:3], np.linalg.norm(IC[0:3]))
    initialVelocityError = 0.01 * np.divide(IC[3:6], np.linalg.norm(IC[3:6]))

    IC[0:6] += np.append(initialPositionError, initialVelocityError)

    # uncertainty to be added in the form of noise to the measurables.
    # Takes the form of variance. Currently, the same value is used in both
    # the creation of the measurements as well as the weighting of the filter (W)
    observationUncertainty = np.identity(2)
    observationUncertainty[0, 0] = 0.2**2
    observationUncertainty[1, 1] = 0.2**2

    # the initial STM is an identity matrix
    phi0 = np.identity(IC.shape[0])

    # initiate a priori deviation
    stateDevBar = np.zeros(IC.shape)

    # initiate a filter output dictionary
    filterOutputs = {}

    ##################################################################################
    #
    # Get the noisy observations
    #
    ##################################################################################

    # observation inputs
    observationInputs = (trueEphemeris, observationUncertainty, angles, extras)

    # Get the observation data (dataObservations). This dictionary contains the SPICE data
    # from which values are calculated (key = 'SPICE'), the true observations before
    # uncertainty is added (key = 'truth') and the measured observations (key = 'measurements').
    # These are the 'measurements' values that are now simulating an actual observation,
    # and they are to be processed by the filter.
    # The dictionary also contains the list of beacons by name and order of processing.
    # This list of strings (key = 'beacons') is needed for
    # the filter's own beacon position generator
    dataObservations = getObs(observationInputs)

    # create dictionary for observation data to be inputs in filter. This is a more limited
    # dictionary than dataObservations and serves as the most "real" input
    filterObservations = {}
    filterObservations['measurements'] = dataObservations['measurements']
    filterObservations['beaconIDs'] = dataObservations['beacons']

    ##################################################################################
    #
    # Run the Filter
    #
    ##################################################################################

    # alter to coefficient of reflectivity to be zero. This negates any contribution of
    # modeling SRP
    extras['cR'] = 0.0

    # run the filter and output the referenceStates (including STMs), est states and extra data
    for itr in xrange(extras['iterations']):

        if itr > 0:
            IC = estimatedState[0, :]
            stateDevBar -= extraData['stateDevHatArray'][0, :]

        if itr == 0:
            extras['oldPost'] = np.zeros([len(timeSpan), 2])

        # the arguments for the filter: the IC, the first STM, the time span, the observables
        # data dictionary, a priori uncertainty, and the measurables' uncertainty,
        # as well as any extras
        filterInputs = (IC, phi0, timeSpan, filterObservations,\
                         covBar, observationUncertainty, stateDevBar, angles, extras)
        # run filter function
        referenceState, estimatedState, extraData = run_batch(filterInputs)
        extras['oldPost'] = extraData['postfit residuals']

        # Check for anomaly:

        [anomaly_bool, anomaly_num] = extraData['anomaly_detected']
        if anomaly_bool == True:
            print '**********************************************************'
            print 'Anomaly Detected - Estimates are not to be trusted'
            print '**********************************************************'
            print anomaly_num, 'Residuals out of bounds'
            return

        # save all outputs into the dictionary with a name associated with the iteration
        filterOutputs[str(itr)] = {}
        filterOutputs[str(itr)]['referenceState'] = referenceState
        filterOutputs[str(itr)]['estimatedState'] = estimatedState
        filterOutputs[str(itr)]['extraData'] = extraData

        ##################################################################################
        #
        # \ BLOCK A page 196
        #
        ##################################################################################

        # Iteration Directory
        dirIt = 'Batch_Iteration' + str(itr + 1)

        # Make directory for the iterations
        if not os.path.exists(dirIt):
            os.makedirs(dirIt)

        # File to write data
        writingText( itr+1, referenceState, estimatedState, trueEphemeris, extraData,\
                     initialPositionError , initialVelocityError)

        # calculate the difference between the perturbed reference and
        # true trajectories: reference state errors
        stateError = referenceState[:, 0:6] - trueEphemeris['spacecraft'].T

        # compare the estimated and true trajectories: estimated state errors
        stateErrorHat = estimatedState[:, 0:6] - trueEphemeris['spacecraft'].T

        plotData = extraData

        plotData['postfit delta'] = extraData['postfit changes']
        plotData['states'] = estimatedState
        plotData['truth'] = dataObservations['truth']
        plotData['beacon_list'] = dataObservations['beacons']
        plotData['timeSpan'] = timeSpan
        plotData['dirIt'] = dirIt
        plotData['err'] = stateError
        plotData['stateErrorHat'] = stateErrorHat
        plotData['obs_uncertainty'] = observationUncertainty
        plotData['referenceState'] = referenceState
        plotData['trueEphemeris'] = trueEphemeris
        plotData['extras'] = extras
        plotData['acc_est'] = 'OFF'
        PF(plotData)

        #  Write the output to the pickle file
        fileTag = 'SRP_test'
        file = dirIt + '/' + fileTag + '_data.pkl'
        pklFile = open(file, 'wb')
        pickle.dump(plotData, pklFile, -1)
        pklFile.flush()

        pklFile.close()
Example #9
0
              ],                                                                                                                                                                                                                                             
)  

parsl.set_stream_logger() 
parsl.load(config)

from data_generation import generate_data

proteomefile = sys.argv[1]
directory = f'/home/users/ellenrichards/{sys.argv[2]}/'
threshold = 1000

if not os.path.isdir(directory):
    os.makedirs(directory)

generate_data(proteomefile, directory)

os.system(f"touch {directory}/hopper.txt")
os.system(f"touch {directory}/final.txt")
os.system(f"touch {directory}/never.txt")
os.system(f"touch {directory}/maybehopper.txt")
os.system(f"touch {directory}/index_hopping_output.txt")

#csv_filename = "index_hopping_output.csv"
#csv_fh = open(csv_filename, "write")
#csv_writer = csv.DictWriter(csv_fh, fieldnames=["filename", "uniquely", "multi", "totalReads", "uniquelyAGAINST", "multiAGAINST", "totalReadsAGAINST", "percratio"])
#csv_row = {"filename": "filename ", "uniquely": "uniquely ", "multi": "multi ", "totalReads": "totalreads ", "uniquelyAGAINST": "uniquelyC ", "multiAGAINST": "multiC ", "totalReadsAGAINST": "totalreadsC ", "percratio": "percRatio "}
#csv_writer.writerow(csv_row)
os.system(f"echo 'filename  uniquely  multi  totalReads  uniquelyAGAINST  multiAGAINST  totalreadsAGAINST  percRatio'  >> {directory}/index_hopping_output.txt")

#@python_app(executors=["login"])
def run_experiment(configurations):
    X_train, X_val, X_test, y_train, y_val, y_test = data_generation.generate_data(data_fn=data_generation.sine_2,
                                                                                   nb_samples=nb_samples, seq_len=seq_len,
                                                                                   signal_freq=60., add_noise=False)
    rnn = models.lstm_rnn_gru(input_size=input_size, hidden_size=hidden_size, cell_type="lstm").cuda()
    for module in rnn.modules():
        print module
    loss_fn = nn.MSELoss()
    optimizer = optim.RMSprop(rnn.parameters(), lr=0.00001, momentum=0.9)
    """
    Training with ground truth -- The input is the ground truth
    """
    try:
        val_loss_list = []
        for epoch in range(nb_epochs_mainTraining):
            training_loss = 0
            val_loss = 0
            rnn.train(True)
            for batch, i in enumerate(range(0, X_train.size(0) - 1, batch_size)):
                data, targets = data_generation.get_batch(X_train, y_train, i, batch_size=batch_size)
                output = rnn(data) # This is the original, training with ground truth only
                # output = rnn(data[:, :100], future=400)  # Here, trying to use the model output as part of the input
                optimizer.zero_grad()
                loss = loss_fn(output, targets)
                loss.backward()
                optimizer.step()
                training_loss += loss.data[0]
            training_loss /= batch
            rnn.train(False)
            for batch, i in enumerate(range(0, X_val.size(0) - 1, batch_size)):
                data, targets = data_generation.get_batch(X_val, y_val, i, batch_size=batch_size)
                output = rnn(data)
                loss = loss_fn(output, targets)
                val_loss += loss.data[0]
            val_loss /= batch
            val_loss_list.append(val_loss)
            print "Ground truth - Epoch " + str(epoch) + " -- train loss = " + str(training_loss) + " -- val loss = " + str(val_loss)

            # Early stopping condition -- when the last 4 epochs results in a validation error < 0.015
            cond_true = False
            if len(val_loss_list) >= 4:
                cond_true = True
                for i in val_loss_list[-4:]:
                    if i > 0.015:
                        cond_true = False
                        break
            if cond_true == True:
                print "Triggering early stopping criteria - Out of training"
                break
    except KeyboardInterrupt:
        print "Early stopping for the training"

    """
    Measuring the test score -> running the test data on the model
    """
    rnn.train(False)
    test_loss = 0
    list1 = []
    list2 = []
    for batch, i in enumerate(range(0, X_test.size(0) - 1, batch_size)):
        data, targets = data_generation.get_batch(X_test, y_test, i, batch_size=batch_size)
        output = rnn(data)
        loss = loss_fn(output, targets)
        test_loss += loss.data[0]
        target_last_point = torch.squeeze(targets[:, -1]).data.cpu().numpy().tolist()
        pred_last_point = torch.squeeze(output[:, -1]).data.cpu().numpy().tolist()
        list1 += target_last_point
        list2 += pred_last_point
    plt.figure()
    plt.plot(list1, "b")
    plt.plot(list2, "r")
    plt.legend(["Original data", "Generated data"])
    plt.show()
    test_loss /= batch
    print "Test loss = ", test_loss

    """
    Generating sequences - attempt 1 --> Exactly like "time sequence prediction" example
    """
    data = X_test[0, :].view(1, -1)
    # output = rnn(data[:, :100], future=future_steps)
    output = rnn(data, future=future_steps)
    output = torch.squeeze(output).data.cpu().numpy()
    plt.figure()
    plt.plot(output)
    plt.xlabel("Time step")
    plt.ylabel("Signal amplitude")
    plt.show()
Example #11
0
 def generate_data(self):
     self.input, self.target = data_generation.generate_data(self.dataset_type, self.no_of_samples, self.no_of_classes)
Example #12
0
    target_posterior_y = rslds.smooth(target_posterior_x, target)

    return {
        'training_elbos': q_elbos_lem,
        'input_xhat': xhat_lem,
        'input_zhat': zhat_lem,
        'target_elbos': target_elbos,
        'target_posterior': target_posterior_y
    }


if __name__ == "__main__":
    # sample from Lorentz system
    batch_size = 10
    t_steps = 1000
    data = generate_data(batch_size, t_steps)
    inputs, targets = input_and_target(data)

    # fit an slsd model
    res = fit_slds(inputs[0], targets[0])

    fig, axs = plt.subplots(nrows=3, ncols=1)
    axs[0].plot(res['training_elbos'], label='Training ELBO')
    axs[0].set_xlabel('iteration')
    axs[0].set_ylabel('ELBO')

    axs[1].plot(res['target_elbos'], label='Prediction ELBO')
    axs[1].set_xlabel('iteration')
    axs[1].set_ylabel('ELBO')

    axs[2].plot(targets[0][:, 0], c='b', label='true target')
Example #13
0
ds_size = 10000
distribution = 'student'
restarts = 20

S = list(range(1000, 10100, 100))

names = ['X', 'Y']

dag = gum.DAG()
dag.addNodes(2)
# dag.addArc(0,1)

ndag = otagrum.NamedDAG(dag, names)

D = [dg.generate_data(ndag, ds_size, distribution, r=0.8) for _ in range(restarts)]

I = []
for size in S:
    print("Size: ", size)
    info = 0
    for i,data in enumerate(D):
        print("Restart: ", i+1)
        cmi = otagrum.CorrectedMutualInformation(data[:size])
        cmi.setKMode(otagrum.CorrectedMutualInformation.KModeTypes_NoCorr)
        info += cmi.compute2PtCorrectedInformation(0, 1)
    I.append(info/restarts)

plt.plot(S, I)
plt.show()
Example #14
0
def test_generation(args):
    global random_data
    random_data = generate_data(args)
Example #15
0
parser.add_argument('--sigmoid', type=str, default=None)
parser.add_argument('--tanh', type=str, default=None)
parser.add_argument('--warmup', type=int, default=10)
parser.add_argument('--optim', type=str, default='Adam_HD')
parser.add_argument('--seed', type=int, default=None)
parser.add_argument('--verbose', action='store_true')
args = parser.parse_args()
print(args)

if args.seed is not None:
    np.random.seed(args.seed)
    torch.manual_seed(args.seed)

X_train, X_val, X_test, y_train, y_val, y_test = data_generation.generate_data(
    data_fn=args.data_fn,
    batch_size=args.batch_size,
    length=args.length,
    add_noise=args.add_noise)

rnn = models.Model(input_size=X_train.size(-1),
                   layers=args.layers,
                   output_size=y_train.size(-1),
                   sigmoid=args.sigmoid,
                   tanh=args.tanh)
print(rnn)
print(sum([p.numel() for p in rnn.parameters() if p.requires_grad]),
      "trainable parameters")

loss_fn = nn.MSELoss()
if hasattr(custom_optim, args.optim):
    optimizer = getattr(custom_optim, args.optim)(rnn.parameters())
    placeholder_input = tf.placeholder(tf.complex64,
                                       shape=(BATCH_SIZE, WINDOW_PIXEL_NUM))

    onn_measurement = mmm.inference_testing(placeholder_input, save_mask_phase,
                                            save_mask_amp, save_mask_holes)

    #    tf.gfile.MakeDirs(SENSOR_SAVING_PATH)

    sess = tf.InteractiveSession()

    count = 0

    for step in range(MAX_STEPS):

        testing_input, testing_gt, testing_gt_1 = dtg.generate_data('testing')
        onn_measurement_value_test = sess.run(
            onn_measurement, feed_dict={placeholder_input: testing_input})

        save_measurement = np.reshape(onn_measurement_value_test, (M, N))
        if (OBJECT_AMPLITUDE_INPUT):

            save_input = np.reshape(np.real(testing_input), (M, N))

        else:

            save_input = (np.reshape(np.angle(testing_input),
                                     (M, N)) + np.pi) / 2 / np.pi

        save_input = save_input[M//2-OBJECT_ROW//2:M//2-OBJECT_ROW//2+OBJECT_ROW,\
                                N//2-OBJECT_COL//2:N//2-OBJECT_COL//2+OBJECT_COL]
Example #17
0
        "FN", "TP", "R2", "DEMEAN"
    ])
    record_i = 0

    # %% settings
    n = 1600
    split_ratio = 1000 / 1600
    sample_size = 10  # 10 sample size ~= 1 HR
    test_cases = [(2, 20, 0.2)]  # exp_no, d, noise_sigma

    # %%
    for exp_no, d, noise_sigma in test_cases:
        for sample_no in range(sample_size):  # sample size
            for demean in (True, False):
                # %% create data
                data = generate_data(n, d, exp_no, noise_sigma)
                prediction = gp_predict(data, split_ratio, demean=demean)

                # %% calculate scores
                tn, fp, fn, tp = confusion_matrix(prediction["t"],
                                                  prediction["t_hat"]).ravel()
                acc = (tn + tp) / (tn + fp + fn + tp)

                r = np.corrcoef(prediction["te"], prediction["te_hat"])[0, 1]
                r2 = r**2

                # %% add records
                record.loc[record_i] = [
                    n, d, exp_no, noise_sigma, sample_no, acc, tn, fp, fn, tp,
                    r2, demean
                ]
Example #18
0
def load_create_data(data_type,
                     data_out,
                     is_logging_enabled=True,
                     fn_csv=None,
                     label_nm=None):

    df_train, df_test, dset = None, None, None
    features = None
    if data_type in data_loader_mlab.get_available_datasets() + ['show'] \
       or fn_csv is not None:
        if fn_csv is not None:
            rval, dset = data_loader_mlab.load_dataset_from_csv(
                logger, fn_csv, label_nm)
        else:
            rval, dset = data_loader_mlab.get_dataset(data_type)
        assert rval == 0
        data_loader_mlab.dataset_log_properties(logger, dset)
        if is_logging_enabled:
            logger.info('warning no seed')
        df = dset['df']
        features = dset['features']
        labels = dset['targets']
        nsample = len(df)
        train_ratio = 0.8
        idx = np.random.permutation(nsample)
        ntrain = int(nsample * train_ratio)
        df_train = df.iloc[idx[:ntrain]]
        df_test = df.iloc[idx[ntrain:]]

        col_drop = utilmlab.col_with_nan(df)
        if is_logging_enabled and len(col_drop):
            print('warning: dropping features {}'
                  ', contains nan'.format(col_drop))
            time.sleep(2)

        features = [el for el in features if el not in col_drop]

        x_train = df_train[features].values
        y_train = df_train[labels].values
        x_test = df_test[features].values
        y_test = df_test[labels].values

        g_train, g_test = None, None

        y_train = one_hot_encoder(np.ravel(y_train))
        y_test = one_hot_encoder(np.ravel(y_test))
        if is_logging_enabled:
            logger.info('y: train:{} test:{}'.format(set(np.ravel(y_train)),
                                                     set(np.ravel(y_test))))
    else:
        x_train, y_train, g_train = generate_data(n=train_N,
                                                  data_type=data_type,
                                                  seed=train_seed,
                                                  out=data_out)
        x_test, y_test, g_test = generate_data(n=test_N,
                                               data_type=data_type,
                                               seed=test_seed,
                                               out=data_out)
    if is_logging_enabled:
        logger.info('{} {} {} {}'.format(x_train.shape, y_train.shape,
                                         x_test.shape, y_test.shape))
    return x_train, y_train, g_train, x_test, y_test, \
        g_test, df_train, df_test, dset, features
Example #19
0
def main():

    # extras dictionary for importing to functions
    extras = {}

    ###########################################
    #
    #  S  P  I  C  E  C  O  D  E
    #  
    ##########################################

    # basic .bsp filename (generic, such as de430, etc)
    extras['basic_bsp']   = 'de430.bsp'
    # .bsp filename for mission
    extras['mission_bsp'] = 'DINO_kernel.bsp'
    # .tls filename 
    extras['tls']         = 'naif0011.tls'

    # prep pyswice for the extraction of initial data
    # is the only reason that we do this is for lines 165 and 166?
    pyswice.furnsh_c(bskSpicePath  + 'de430.bsp')
    pyswice.furnsh_c(dinoSpicePath + 'naif0011.tls')
    pyswice.furnsh_c(dinoSpicePath + 'DINO_kernel.bsp')

    DINO_kernel = dinoSpicePath + 'DINO_kernel.bsp'
    body_int = -100#SP.spkobj(DINO_kernel)
    body_id_str = str(body_int)

    # search_window = pyswice.new_doubleArray(2)
    # pyswice.spkcov_c(DINO_kernel, body_int, search_window)
    # list_of_events = pyswice.wnfetd_c(search_window, 0)
    # tBSP_Start = list_of_events[0]
    # tBSP_End = list_of_events[1]

    ###########################################
    # Initial condition for spacecraft
    # data = io.loadmat('saves/obsData.mat')
    # trueEphemeris = {}
    # reference of sun to sc
    # trueEphemeris['spacecraft'] = np.copy(data['stateS'])
    # # reference of sun to Earth
    # trueEphemeris['S2E'] = np.copy(data['stateE'])
    # # reference of sun to Mars
    # trueEphemeris['S2M'] = np.copy(data['stateM'])

    # time span
    # timeSpan = data['etT'].flatten()
    #Filtering End Epochs
    start_et = pyswice.new_doubleArray(1)
    end_et=pyswice.new_doubleArray(1)
    pyswice.utc2et_c('23 JUL 2020 17:00:00', start_et)
    pyswice.utc2et_c('30 JUL 2020 17:00:00', end_et)

    start_et = pyswice.doubleArray_getitem(start_et, 0)
    end_et   = pyswice.doubleArray_getitem(end_et, 0)



    # body vector for SUN, EARTH, MARS
    # CODE RELIES ON SUN BEING INDEXED AS 0
    extras['bodies'] = ['SUN', '3', '399']

    # specify primary and secondary
    extras['primary'] = 0
    extras['secondary'] = [1, 2]

    # respective GP vector
    extras['mu'] = [1.32712428 * 10 ** 11, 3.986004415 * 10 ** 5, 4.305 * 10 ** 4]

    # abcorr for spkzer
    extras['abcorr'] = 'NONE'

    # reference frame
    extras['ref_frame'] = 'J2000'

    # SRP parameter
    # A/M ratio multiplied by solar pressure constant at 1 AU with adjustments
    extras['SRP'] = 0.3**2/14. * 149597870.**2 * 1358. / 299792458. / 1000. # turboprop document Eq (64)

    # coefficient of reflectivity
    extras['cR'] = 1.

    # number of observations per beacon until moving to the next
    extras['repeat_obs'] = 1

    # SNC coefficient
    extras['SNC'] = (2 * 10 ** (-4)) ** 3

    # Number of batch iterations
    extras['iterations'] = 3

    # Initializing the error
    extras['x_hat_0'] = 0

    # rng seed for debugging purposes
    extras['seed'] = 5


    ##################################################################################
    #
    # Camera/P&L Parameters
    #
    ##################################################################################

    # Focal Length (mm)
    extras['FoL'] = 100.
    angles = []
    extras['DCM_BI'] = np.eye(3)
    extras['DCM_TVB'] = np.eye(3)

    # Camera resolution (pixels)
    extras['resolution'] = [1024., 1024.]

    # width and height of pixels in camera
    extras['pixel_width'] = 5.
    extras['pixel_height'] = 5.

    # direction coefficient of pixel and line axes
    extras['pixel_direction'] = 1.
    extras['line_direction'] = 1.

    # Are we using the real dynamics for the ref or the trueData
    extras['realData']= 'OFF'

    # Add anomaly detection parameters
    extras['anomaly']= False
    extras['anomaly_num'] = 0
    extras['anomaly_threshold'] = 4

    ##################################################################################

    # Get Observation Times and Ephemerides. This outputs a full data set that is not
    # parsed in any way. Ephemerides for all objects at all times are given.
    trueEphemeris, timeSpan = dg.generate_data(sc_ephem_file=DINO_kernel,
                                          planet_beacons = ['earth','mars barycenter'],
                                          beaconIDs=[],
                                          n_observations=24,
                                          start_et=start_et,
                                          end_et=end_et,
                                          extras = extras,
                                          realData = extras['realData'])

    tt_switch = 5

    print '------------------'
    print 'Filter Image Span : ' , (timeSpan[-1] - timeSpan[0])/(60*60*24), 'days'
    print '------------------'


    # number and keys of beacons. note that the true ephem is going to have one spot for the
    # sun, which in NOT a beacon. These are used in beaconBinSPICE. 
    beacon_names = trueEphemeris.keys()
    beacon_names.remove('spacecraft')
    extras['unique_beacon_IDs'] = beacon_names
    extras['n_unique_beacons'] = len(beacon_names)

    ##################################################################################
    #
    # BLOCK A page 196
    #
    ##################################################################################

    # copy the initial conditions as the first sun to SC referenceStates from the SPICE file
    IC = np.copy(trueEphemeris['spacecraft'][:, 0])

    ######################################
    # UNMODELED ACCELERATION TERMS
    # ALPHA IMPLEMENTATION
    # CURRENTLY TOO MUCH HARD CODING
    ######################################
    IC = np.append( IC, np.array( [0, 0, 0] ) )

    print 'IC', IC

    # spice_derived_state is only referenced here. Should these be axed?
    spice_derived_state = pyswice.new_doubleArray(6)
    lt = pyswice.new_doubleArray(1)
    pyswice.spkezr_c(body_id_str, timeSpan[0], 'J2000', 'None', 'Sun', spice_derived_state, lt)

    
    # a priori uncertainty for the referenceStates
    covBar = np.zeros((IC.shape[0], IC.shape[0]))
    covBar[0, 0] = 10000**2
    covBar[1, 1] = 10000**2
    covBar[2, 2] = 10000**2
    covBar[3, 3] = .1**2
    covBar[4, 4] = .1**2
    covBar[5, 5] = .1**2
    covBar[6, 6] = (10**(-8))**2
    covBar[7, 7] = (10**(-8))**2
    covBar[8, 8] = (10**(-8))**2

    # add uncertainty to the IC
    initialPositionError = 1000 * np.divide(IC[0:3], np.linalg.norm(IC[0:3]))
    initialVelocityError = 0.01 * np.divide(IC[3:6], np.linalg.norm(IC[3:6]))

    IC[0:6] += np.append(initialPositionError, initialVelocityError)

    # uncertainty to be added in the form of noise to the measurables. 
    # Takes the form of variance. Currently, the same value is used in both
    # the creation of the measurements as well as the weighting of the filter (W)
    observationUncertainty = np.identity(2)
    observationUncertainty[0, 0] = 0.2 ** 2
    observationUncertainty[1, 1] = 0.2 ** 2

    # the initial STM is an identity matrix
    phi0 = np.identity(IC.shape[0])

    # initiate a priori deviation
    stateDevBar = np.zeros(IC.shape)

    # initiate a filter output dictionary
    filterOutputs = {}

    ##################################################################################
    #
    # Get the noisy observations
    #
    ##################################################################################
        
    # observation inputs
    observationInputs = (trueEphemeris, observationUncertainty, angles, extras)

    # Get the observation data (dataObservations). This dictionary contains the SPICE data
    # from which values are calculated (key = 'SPICE'), the true observations before
    # uncertainty is added (key = 'truth') and the measured observations (key = 'measurements').
    # These are the 'measurements' values that are now simulating an actual observation, 
    # and they are to be processed by the filter. 
    # The dictionary also contains the list of beacons by name and order of processing. 
    # This list of strings (key = 'beacons') is needed for 
    # the filter's own beacon position generator
    dataObservations = getObs(observationInputs)

    # create dictionary for observation data to be inputs in filter. This is a more limited
    # dictionary than dataObservations and serves as the most "real" input
    filterObservations = {}
    filterObservations['measurements'] = dataObservations['measurements']
    filterObservations['beaconIDs']    = dataObservations['beacons']

    ##################################################################################
    #
    # Run the Filter
    #
    ##################################################################################

    # alter to coefficient of reflectivity to be zero. This negates any contribution of
    # modeling SRP
    extras['cR'] = 0.0

   # run the filter and output the reference referenceStates (including STMs), est states and extra data
    for itr in xrange(extras['iterations']):

        if itr > 0:
            # IC = est_state[0, :]
            IC += extraData['stateDevHatArray'][0, :]
            stateDevBar -= extraData['stateDevHatArray'][0, :]

        # the arguments for the filter are the IC, the first STM, the time span, the observables
        # data dictionary, a priori uncertainty, and the measurables' uncertainty,
        # as well as any extras
        if itr==0:
            extras['oldPost'] = np.zeros([len(timeSpan), 2])

        filterInputs = (IC, phi0, timeSpan, filterObservations,\
                         covBar, observationUncertainty, stateDevBar, angles, extras)
        # run filter function
        referenceState, estimatedState, extraData = run_batch(filterInputs)

        extras['oldPost'] = extraData['postfit residuals']
        # save all outputs into the dictionary with a name associated with the iteration
        filterOutputs[str(itr)] = {}
        filterOutputs[str(itr)]['referenceState'] = referenceState
        filterOutputs[str(itr)]['estimatedState'] = estimatedState
        filterOutputs[str(itr)]['extraData'] = extraData

        ##################################################################################
        #
        # \ BLOCK A page 196
        #
        ##################################################################################

        # Iteration Directory
        dirIt = 'Batch_Iteration' + str(itr+1)

        # Make directory for the iterations
        if not os.path.exists(dirIt):
            os.makedirs(dirIt)

        # File to write data
        writingText(itr+1, referenceState, estimatedState, trueEphemeris, extraData, initialPositionError , initialVelocityError)

        # calculate the difference between the perturbed reference and true trajectories: reference state errors
        stateError = referenceState[:, 0:6] - trueEphemeris['spacecraft'].T

        # compare the estimated and true trajectories: estimated state errors
        stateErrorHat = estimatedState[:, 0:6] - trueEphemeris['spacecraft'].T

        plotData = extraData

        plotData['postfit delta']   = extraData['postfit changes']
        plotData['states']          = estimatedState
        plotData['truth']           = dataObservations['truth']
        plotData['beacon_list']     = dataObservations['beacons']
        plotData['timeSpan']        = timeSpan
        plotData['dirIt']           = dirIt
        plotData['err']             = stateError
        plotData['stateErrorHat']   = stateErrorHat
        plotData['obs_uncertainty'] = observationUncertainty
        plotData['referenceState']  = referenceState
        plotData['trueEphemeris']   = trueEphemeris
        plotData['extras']          = extras
        plotData['acc_est']         = 'ON'
        PF( plotData )

        #  Write the output to the pickle file
        fileTag = 'SRP_test'
        file = dirIt+'/'+fileTag+'_data.pkl'
        pklFile = open( file, 'wb')
        pickle.dump( plotData, pklFile, -1 )
        pklFile.flush()

        pklFile.close()

    [anomaly_bool , anomaly_num] = extraData['anomaly_detected']
    if anomaly_bool == True:
        print '**********************************************************'
        print 'Anomaly Detected - Estimates are not to be trusted'
        print '**********************************************************'
        print anomaly_num, 'Residuals out of bounds'




    return
Example #20
0
    params = util.create_param_dict(config)

    df = pd.DataFrame(util.parseParams(params))

    all_predY = None
    all_error = None
    mean_errors = []
    std_errors = []
    for iter_number in range(
            params['exp_details__num_iterations_per_setting']):
        # generate mobile points, base stations, and angles
        mobiles, bases, angles = data_generation.generate_data(
            params['data__num_pts'],
            params['data__num_stations'],
            params['data__ndims'],
            pts_r=3.,
            bs_r=4,
            bs_type=params['data__bs_type'],
            points_type=params['data__data_dist'])

        # IMPORTANT: remember to add noise before replicating data (e.g., for snbp-mlp)
        if params['noise__addnoise_train']:
            angles, mobiles = noise_models.add_noise_dispatcher(
                angles,
                mobiles,
                params['noise__noise_model'],
                params['data__ndims'],
                base_idxs=params['noise__bases_to_noise'],
                noise_params=params['noise__noise_params'])

        if params['NN__type'] == 'snbp-mlp' or params['NN__type'] == 'smlp':
import numpy as np
import matplotlib.pyplot as plt

import data_generation
import models

batch_size = 64
seq_len = 100  # This is equivalent to time steps of the sequence in keras
input_size = 1
hidden_size = 51
target_size = 1
nb_samples = 1000
nb_epochs_mainTraining = 2000
nb_epochs_fineTuning = 200

X_train, X_val, X_test, y_train, y_val, y_test = data_generation.generate_data(
    data_fn=data_generation.sine_2, nb_samples=nb_samples, seq_len=seq_len)
rnn = models.lstm_rnn_gru(input_size=input_size,
                          hidden_size=hidden_size,
                          cell_type="lstm").cuda()
for module in rnn.modules():
    print module
loss_fn = nn.MSELoss()
optimizer = optim.RMSprop(rnn.parameters(), lr=0.00001, momentum=0.9)
# optimizer = optim.SGD(rnn.parameters(), lr=0.000003, momentum=0.95)
# optimizer = optim.LBFGS(rnn.parameters())
# optimizer = optim.Adam(rnn.parameters(), lr=0.00001)
# optimizer = optim.Adagrad(rnn.parameters(), lr=0.0001)
"""
Training with ground truth -- The input is the ground truth
"""
try:

# Synthetic or leukemia dataset
dataset = "leukemia"

if dataset == "synthetic":
    # Generate data set
    n_samples = 100
    n_features = 200
    sigma = 1.
    sparsity = 0.9
    corr = 0.5
    random_state = np.random.randint(0, 100)

    X, y, true_beta, true_sigma = generate_data(n_samples, n_features, sigma,
                                                sparsity, corr,
                                                random_state=random_state)

if dataset == "leukemia":

    data = fetch_mldata('leukemia')
    X = data.data
    y = data.target
    X = X.astype(float)
    y = y.astype(float)
    n_samples, n_features = X.shape

NO_SCREENING = 0
GAPSAFE = 1
WSTRT_SIGMA_0 = 2
BOUND = 3
Example #23
0
Tstruct_file = structure + ".txt"
struct_directory = "../../data/structures/"

data_directory = path.join(data_directory, structure)
if not path.isdir(data_directory):
    os.mkdir(data_directory)

if args.distribution == "gaussian" or args.distribution == "student":
    r_subdir = 'r' + str(args.correlation).replace('.', '')
    data_directory = path.join(data_directory, r_subdir)
    if not path.isdir(data_directory):
        os.mkdir(data_directory)

# If not the good length remove all
ldir = os.listdir(data_directory)
if ldir:
    with open(path.join(data_directory, ldir[0]), 'r') as f:
        if len(f.read().split('\n')) != (sample_size + 2):
            for l in ldir:
                os.remove(path.join(data_directory, l))
            
n_existing_sample = len(os.listdir(data_directory))

Tstruct = load.load_struct(path.join(struct_directory, Tstruct_file))
ndag=otagr.NamedDAG(Tstruct)

for i in range(n_existing_sample, n_sample):
    sample = dg.generate_data(ndag, sample_size, args.distribution, correlation)
    sample.exportToCSVFile(path.join(data_directory, data_file_name) + \
                           '_' + str(i+1).zfill(2) + ".csv", ',')