コード例 #1
0
ファイル: data_processing.py プロジェクト: lokira/tools
def req_0(dict_G, dict_D, cmd, check_entry_list):
    """
    get dut and golden data,draw data picture.
        Arguments:
            dict_G - The dictionary contains commands and data.
            dict_D - The dictionary contains commands and data.
            cmd -  The command to search.
    """
    data10 = uti.read_data(dict_D, cmd)
    data10_G = uti.read_data(dict_G, cmd)

    entry = CheckEntry(cmd, CheckEntry.Y)

    if data10_G is None:
        entry.add_err_msg(CMD_NOT_FOUND_S % (GOLDEN_S, cmd))
    else:
        entry.load_data_G(data10_G)

    if data10 is None:
        entry.add_err_msg(CMD_NOT_FOUND_S % (DUT_S, cmd))
    else:
        entry.load_data(data10)

    if data10 and data10_G and len(data10_G) != len(data10):
        entry.add_err_msg(GD_NOT_MATCH_S)

    check_entry_list.append(entry)
コード例 #2
0
ファイル: sensitivity.py プロジェクト: seinecke/pct-tools
def main(gamma_input, proton_input, output, t_obs, flux, ref):
    t_obs *= u.h

    gammas = read_data(gamma_input, weight=True, spectrum='crab', t_obs=t_obs)
    protons = read_data(proton_input,
                        weight=True,
                        spectrum='proton',
                        t_obs=t_obs)

    gammas = add_theta(gammas)
    protons = add_theta(protons)

    bins, bin_centers, bin_widths = make_energy_bins(e_min=0.08 * u.TeV,
                                                     e_max=300 * u.TeV,
                                                     bins=15,
                                                     centering='log')

    rel_sens = calc_relative_sensitivity(gammas,
                                         protons,
                                         bins,
                                         method='exact',
                                         alpha=0.2)

    ax = plot_sensitivity(rel_sens,
                          bins,
                          bin_centers,
                          label=f'This Analysis {t_obs:2.0f}')

    if flux:
        ax = plot_crab_flux(bins, ax)

    if ref:
        ax = plot_ref_sens(ax)

    ax.text(0.95,
            0.95,
            'Differential Sensitivity',
            transform=ax.transAxes,
            horizontalalignment='right',
            verticalalignment='center')

    ax.set_xscale('log')
    ax.set_yscale('log')
    ax.set_xlim([1E-2, 10**(2.5)])
    ax.set_ylim([0.8E-13, 2E-10])
    ax.set_ylabel(
        r'$ E^2 \times \mathrm{Flux}\ \mathrm{Sensitivity} \ / \ (\mathrm{erg} \ \mathrm{s}^{-1} \ \mathrm{cm}^{-2}$)'
    )
    ax.set_xlabel(
        r'$\mathrm{Reconstructed}\ \mathrm{Energy}\ E\ /\  \mathrm{TeV}$')
    ax.legend(loc='lower left')

    if output:
        plt.savefig(output)
    else:
        plt.show()
コード例 #3
0
def stack_with_features():
    train = read_data('raw_data/train.csv')
    test = read_data('raw_data/test.csv')

    df_tr = pd.read_csv(agg_loc + agg_name)
    train = pd.merge(train, df_tr, on='card_id', how='left').fillna(0)
    test = pd.merge(test, df_tr, on='card_id', how='left').fillna(0)

    del df_tr

    train = fe.combine_categs(train)
    test = fe.combine_categs(test)

    train = train[
        ['card_id', 'target'] +
        [col for col in train.columns if 'purchase' in col or 'month' in col]]
    test = test[
        ['card_id'] +
        [col for col in train.columns if 'purchase' in col or 'month' in col]]

    print(train.columns)

    stacked = pd.read_csv('results/stack_n_blend/oof_predictions.csv')
    del stacked['Unnamed: 0']
    del stacked['target']
    st_test = pd.read_csv('results/stack_n_blend/all_predictions.csv')

    #stacked = stacked[[col for col in stacked.columns if 'lightGBM_' in col]]
    #st_test = st_test[[col for col in stacked.columns if 'lightGBM_' in col] + ['card_id']]

    train = pd.concat([train, stacked], axis=1)
    test = pd.merge(test, st_test, on='card_id', how='left')

    del train['lightGBM_full']
    del test['lightGBM_full']

    target = train['target']
    id_to_sub = test.card_id
    del train['target']
    del train['card_id']
    del test['card_id']

    kfolds = KFold(10, shuffle=True, random_state=42)

    predictions, cv_score, feat_imp, oof = ms.lightgbm_train(
        train, test, target, kfolds)

    sub_df = pd.DataFrame({"card_id": id_to_sub.values})
    sub_df["target"] = predictions
    sub_df.to_csv(save_loc + 'stacked_with_feats.csv', index=False)
    feat_imp.to_csv(save_loc + "stacked_with_feats_featimp.csv", index=False)

    print(cv_score)
コード例 #4
0
ファイル: data_processing.py プロジェクト: lokira/tools
def req_1(dict_G, dict_D, cmd, check_entry_list):
    """
    get dut and golden data,draw data picture.
        Arguments:
            dict_G - The dictionary contains commands and data.
            dict_D - The dictionary contains commands and data.
            cmd -  The command to search.
    """
    x = []
    x_G = []
    y1 = []
    y1_G = []

    data10 = uti.read_data(dict_D, cmd)
    data10_G = uti.read_data(dict_G, cmd)

    entry = CheckEntry(cmd, CheckEntry.XY)
    """
    Plot Golden
    """
    if data10_G is None:
        entry.add_err_msg(CMD_NOT_FOUND_S % (GOLDEN_S, cmd))
    else:
        for j in range(len(data10_G)):
            if j % 2 == 0:
                x_G.append(data10_G[j])
            else:
                y1_G.append(data10_G[j])
        if not (uti.is_same_len(x_G, y1_G)):
            entry.add_err_msg(XY_NOT_MATCH_S % (GOLDEN_S, cmd))
        else:
            entry.load_data_G([x_G, y1_G])
    """
    Plot DUT
    """
    if data10 is None:
        entry.add_err_msg(CMD_NOT_FOUND_S % (DUT_S, cmd))
    else:
        for j in range(len(data10)):
            if j % 2 == 0:
                x.append(data10[j])
            else:
                y1.append(data10[j])
        if not (uti.is_same_len(x, y1)):
            entry.add_err_msg(XY_NOT_MATCH_S % (DUT_S, cmd))
        else:
            entry.load_data([x, y1])

    if data10_G and data10 and len(data10_G) != len(data10):
        entry.add_err_msg(GD_NOT_MATCH_S)

    check_entry_list.append(entry)
コード例 #5
0
ファイル: data_processing.py プロジェクト: lokira/tools
def req_2(dict_G, dict_D, cmd_x, check_entry_list):
    """
    get dut and golden data,draw data picture.
        Arguments:
            dict_G - The dictionary contains commands and data.
            dict_D - The dictionary contains commands and data.
            cmd -  The command to search.
    """
    """
    Read Data
    """
    data10_x = uti.read_data(dict_D, cmd_x)
    data10_x_G = uti.read_data(dict_G, cmd_x)
    cmd_y = cmd_x.replace("_x", "_y")
    data10_y = uti.read_data(dict_D, cmd_y)
    data10_y_G = uti.read_data(dict_G, cmd_y)

    entry = CheckEntry(cmd_x, CheckEntry.XY)
    """
    Plot Golden
    """
    if data10_x_G is None:
        entry.add_err_msg(CMD_NOT_FOUND_S % (GOLDEN_S, cmd_x))
    if data10_y_G is None:
        entry.add_err_msg(CMD_NOT_FOUND_S % (GOLDEN_S, cmd_y))
    if data10_x_G is not None and data10_y_G is not None:
        if not uti.is_same_len(data10_x_G, data10_y_G):
            entry.add_err_msg(XY_NOT_MATCH_S % (GOLDEN_S, cmd_x))
        else:
            entry.load_data_G([data10_x_G, data10_y_G])
    """
    Plot DUT
    """
    if data10_x is None:
        entry.add_err_msg(CMD_NOT_FOUND_S % (DUT_S, cmd_x))
    if data10_y is None:
        entry.add_err_msg(CMD_NOT_FOUND_S % (DUT_S, cmd_y))
    if data10_x is not None and data10_y is not None:
        if not uti.is_same_len(data10_x, data10_y):
            entry.add_err_msg(XY_NOT_MATCH_S % (DUT_S, cmd_x))
        else:
            entry.load_data([data10_x, data10_y])

    if uti.is_same_len(entry.get_data(), entry.get_data_G()) and len(
            entry.get_data()):
        if not uti.is_same_len(entry.get_data()[0], entry.get_data_G()[0]):
            entry.add_err_msg(GD_NOT_MATCH_S)

    check_entry_list.append(entry)
コード例 #6
0
def prepare_dataset(config, columns=COMBINED_COL):
    print('Reading data...')
    dataset = utilities.read_data([
        '{}/{}'.format(config['dir']['data'], data['name'])
        for data in config['data']
    ], columns)
    return dataset
コード例 #7
0
    def _load_data(self):
        global G_DATA

        file_path = utilities.get_file()
        #try:
        G_DATA = utilities.read_data(file_path)
        self.data_name = "[b]" + file_path.split("/")[-1].split(
            ".")[-2] + "[/b]"
        self.data_path = utilities.newline_insert(
            "/".join(file_path.split("/")[:-1]), "/", 28)

        for i in range(len(G_DATA.columns)):
            if (i % 2 == 0):
                background = (0.99, 0.99, 0.99, 1)
            else:
                background = (0.95, 0.95, 0.98, 1)

            drag_button = DraggableButton(
                text="   [b]o[/b]   " + G_DATA.columns[i] + " <" +
                str(G_DATA.dtypes[i]) + ">",
                markup=True,
                pos=(325, Window.height - (280 + 20 * i)),
                background_color=background,
                drop_func=self.refurbish,
                droppable_zone_objects=[
                    self.ids.which_color, self.ids.which_filter,
                    self.ids.which_X, self.ids.which_Y,
                    self.ids.which_start_node, self.ids.which_end_node,
                    self.ids.which_edges, self.ids.which_size,
                    self.ids.whole_screen
                ],
                column=G_DATA.columns[i],
                origin=(325, Window.height - (280 + 20 * i)))
            self.add_widget(drag_button)
コード例 #8
0
ファイル: train.py プロジェクト: Sinacam/simpleNN
def main():

	full_batch, num_cls, label_enum = read_data(filename=args.train_set, dim=args.dim)
	
	if args.val_set is None:
		print('No validation set is provided. Will output model at the last iteration.')
		val_batch = None
	else:
		val_batch, _, _ = read_data(filename=args.val_set, dim=args.dim, label_enum=label_enum)

	num_data = full_batch[0].shape[0]
	
	config = ConfigClass(args, num_data, num_cls)

	if isinstance(config.seed, int):
		tf.random.set_seed(config.seed)
		np.random.seed(config.seed)

	if config.net in ('CNN_4layers', 'CNN_7layers', 'VGG11', 'VGG13', 'VGG16','VGG19'):
		model = CNN_model(config.net, config.dim, config.num_cls)
	else:
		raise ValueError('Unrecognized training model')

	if config.loss == 'MSELoss':
		loss = lambda y_true, y_pred: tf.square(y_true - y_pred)
	else:
		loss = lambda y_true, y_pred: tf.nn.softmax_cross_entropy_with_logits(logits=y_pred, labels=y_true)

	full_batch[0], mean_tr = normalize_and_reshape(full_batch[0], dim=config.dim, mean_tr=None)
	if val_batch is not None:
		val_batch[0], _ = normalize_and_reshape(val_batch[0], dim=config.dim, mean_tr=mean_tr)

	param = model.trainable_weights

	# TODO: check what use_resource in tf1 means
	# TODO: check if the following variables are used, saver saves mean_param
	mean_param = tf.Variable(name='mean_tr', initial_value=mean_tr, trainable=False, 
				validate_shape=True)
	label_enum_var=tf.Variable(name='label_enum', initial_value=label_enum, trainable=False,
				validate_shape=True)
	
	if config.optim in ('SGD', 'Adam'):
		gradient_trainer(
			config, loss, model, full_batch, val_batch, test_network=None)
	elif config.optim == 'NewtonCG':
		newton_trainer(
			config, loss, model, full_batch, val_batch, test_network=None)
コード例 #9
0
    def read_data(self, file_name, **kwargs):
        """
        read data and store them
        """
        step_len = 100 if "step_len" not in kwargs else kwargs["step_len"]
        split_point = None if "split_point" not in kwargs else kwargs[
            "split_point"]
        snr = None if "snr" not in kwargs else kwargs["snr"]
        norm = False if "norm" not in kwargs else kwargs["norm"]
        normal, fault, n_res, f_res = read_data(file_name, step_len,
                                                split_point, snr, norm)
        list_fault, list_parameters = parse_filename(file_name)

        self.step = step_len
        self.fe = len(normal[0])

        mode = [0, 0, 0, 0, 0, 0]
        #para = [0, 0, 0, 0, 0, [0, 0]]
        para = [0, 0, 0, 0, 0, 0, 0]
        #normal data
        if norm:
            for i, r in zip(normal, n_res):
                self.map[tuple(mode)].append(len(self.input))
                self.input.append(i)
                self.res.append(r)
                self.mode.append(tuple(mode))
                self.para.append(tuple(para))
        else:
            for i in normal:
                self.map[tuple(mode)].append(len(self.input))
                self.input.append(i)
                self.mode.append(tuple(mode))
                self.para.append(tuple(para))

        #fault data
        #find faults and parameters
        for i, j in zip(list_fault, list_parameters):
            assert i in self.fault_type
            index = self.fault_type.index(i)
            mode[index] = 1
            if isinstance(j, list):
                para[5] = j[0]
                para[6] = j[1]
            else:
                para[index] = j
        if norm:
            for i, r in zip(fault, f_res):
                self.map[tuple(mode)].append(len(self.input))
                self.input.append(i)
                self.res.append(r)
                self.mode.append(tuple(mode))
                self.para.append(tuple(para))
        else:
            for i in fault:
                self.map[tuple(mode)].append(len(self.input))
                self.input.append(i)
                self.mode.append(tuple(mode))
                self.para.append(tuple(para))
コード例 #10
0
ファイル: data_processing.py プロジェクト: lokira/tools
def req_5(dict_G, dict_D, cmd, check_entry_list):

    data = uti.read_data(dict_D, cmd)
    data_G = uti.read_data(dict_G, cmd)
    entry = CheckEntry(cmd, CheckEntry.TABLE)

    if data is None:
        entry.add_err_msg(CMD_NOT_FOUND_S % (DUT_S, cmd))
        str_D = list()
    else:
        str_D = re.split(r"\s+", data[0])

    if data_G is None:
        entry.add_err_msg(CMD_NOT_FOUND_S % (GOLDEN_S, cmd))
        str_G = list()
    else:
        str_G = re.split(r"\s+", data_G[0])

    comp_res = {}
    for s in str_G:
        comp_res[s.strip()] = 1
    for s in str_D:
        if s.strip() in comp_res:
            comp_res[s.strip()] = 3
        else:
            comp_res[s.strip()] = 2

    t_data = list()
    #t_data.append([DUT_S, GOLDEN_S, "Result"])
    idx = 0
    for item in comp_res:
        t_data.append(["", "", ""])
        if comp_res.get(item) & 2:
            t_data[idx][0] = item
        if comp_res.get(item) & 1:
            t_data[idx][1] = item
        if comp_res.get(item) == 3:
            t_data[idx][2] = "OK"
        else:
            t_data[idx][2] = "NOK"
        idx += 1

    entry.load_t_data(t_data)
    check_entry_list.append(entry)
コード例 #11
0
    def _on_file_drop(self, window, file_path):
        global G_DATA

        file_path = file_path.decode("utf-8")
        ext = os.path.splitext(file_path)[-1].lower()
        if ext == ".png":
            self.background_img = file_path
        else:
            G_DATA = utilities.read_data(file_path)
            print(G_DATA)
コード例 #12
0
def prepare_dataset(config, train_type, columns):
    print('Reading data')
    raw_dataset = utilities.read_data([
        '{}/{}'.format(config['dir']['data'], data) for data in config['data'][train_type]
    ], columns)
    dataset = pd.concat([dataset for dataset in raw_dataset])

    if train_type == 'main':
        if 'specjvm' in config['name']:
            dataset = dataset.iloc[1000:2000]
        elif 'renaissance' in config['name']:
            pass
            # dataset = dataset[dataset['gc_time_clean'] < 100]
            # dataset = dataset.iloc[1500:]
            # dataset = dataset.iloc[2000:]
            # [dataset['gc_time_clean'] < 1500]
        elif 'dacapo' in config['name']:
            dataset = dataset.iloc[1000:2000]

    print()
    print('Data summaries')
    print(dataset.describe())

    print()
    print('Prepare dataset to predict')
    pred_dataset = (dataset.iloc[:, :-1], dataset.iloc[:, -1])
    
    print()
    print('Create cleaned dataset')
    clean_dataset = utilities.clean_data(dataset)

    print()
    print('Splitting dataset')
    splitted_dataset = train_test_split(
        dataset.iloc[:, :-1],
        dataset.iloc[:, -1],
        test_size=0.25, 
        random_state=42)

    print()
    print('Splitting cleaned dataset')
    splitted_cleaned_dataset = train_test_split(
        clean_dataset.iloc[:, :-1], 
        clean_dataset.iloc[:, -1],
        test_size=0.25, 
        random_state=42)

    return {
        'raw': raw_dataset,
        'dataset': dataset,
        'predict': pred_dataset,
        'cleaned': clean_dataset,
        'splitted_dataset': splitted_dataset,
        'splitted_cleaned_dataset': splitted_cleaned_dataset,
    }
コード例 #13
0
    def PreparingData(self):
        if self._status == "l":
            my_shelve = shelve.open(self.__filename)
            return self.__filename
        elif self._status == "s":
            X_train, labels_train, list_ch_train = ut.read_data(
                data_path=self.__pathDS, split="train")  # train

            X_test, labels_test, list_ch_test = ut.read_data(
                data_path=self.__pathDS, split="test")  # test

            features_train = ut.read_Features(data_path=self.__pathDS,
                                              split="train")  # features train
            features_test = ut.read_Features(data_path=self.__pathDS,
                                             split="test")  # features train

            assert list_ch_train == list_ch_test, "Mistmatch in channels!"

            # Normalize?
            X_train, X_test = ut.standardize(X_train, X_test)
            # X_tr, X_vld, lab_tr, lab_vld = train_test_split(X_train, labels_train,
            #                                                 stratify=labels_train, random_state=123)

            # One-hot encoding:
            y_tr = ut.one_hot(labels_train)
            # y_vld = ut.one_hot(lab_vld)
            y_test = ut.one_hot(labels_test)

            my_shelve = shelve.open(self.__filename, 'n')
            my_shelve['data_train'] = X_train
            # my_shelve['data_vld'] = X_vld
            my_shelve['data_test'] = X_test
            my_shelve['labels_train'] = y_tr
            my_shelve['labels_test'] = y_test
            # my_shelve['labels_vld'] = y_vld
            my_shelve['labels_test'] = y_test
            my_shelve['features_train'] = features_train
            my_shelve['features_test'] = features_test
            return self.__filename
コード例 #14
0
def main(input_file, output, thresholds):

    df = read_data(input_file)
    df = add_theta(df)

    if not thresholds:
        thresholds = [0.0]

    bins, bin_centers, bin_widths = make_energy_bins(e_min=0.08 * u.TeV,
                                                     e_max=300 * u.TeV,
                                                     bins=15,
                                                     centering='log')

    ax = None

    for t in thresholds:

        e_true = df[df.gamma_score_mean > t].mc_energy.values
        e_reco = df[df.gamma_score_mean > t].energy_mean.values

        resolution = np.abs(e_reco - e_true) / e_true

        ax = plot_percentile(e_reco,
                             resolution,
                             t,
                             bins,
                             bin_centers,
                             bin_widths,
                             ax=ax)

    ax.plot([10**0, 10**2.47], [0.1, 0.1],
            '--',
            color='silver',
            label='SST sub-system')

    ax.set_xscale('log')
    ax.set_xlabel(
        r'$\mathrm{Reconstructed}\ \mathrm{Energy}\ /\  \mathrm{TeV}$')
    ax.set_ylabel('$\Delta E\ /\ E\ (68\% \ \mathrm{containment})$')
    ax.set_ylim([0, 0.5])
    ax.legend()
    plt.tight_layout()

    if output:
        plt.savefig(output)
    else:
        plt.show()
コード例 #15
0
def main(input_file, output, thresholds):

    df = read_data(input_file)
    df = add_theta(df)

    if not thresholds:
        thresholds = [0.0]

    bins, bin_centers, bin_widths = make_energy_bins(e_min=0.08 * u.TeV,
                                                     e_max=300 * u.TeV,
                                                     bins=15,
                                                     centering='log')

    ax = None

    for t in thresholds:
        x = df[df.gamma_score_mean > t].energy_mean.values
        y = df[df.gamma_score_mean > t].theta

        ax = plot_percentile(x, y, t, bins, bin_centers, bin_widths, ax=ax)

    ref = np.loadtxt('references/South-SST-AngRes.txt')
    plt.plot(10**ref[:, 0],
             ref[:, 1],
             '--',
             label='SST sub-system',
             color='silver')

    ax.set_xscale('log')
    ax.set_xlim([0.5, 300])
    ax.set_ylim([0, 0.5])
    ax.set_ylabel('Angular Resolution / deg')
    ax.set_xlabel(
        r'$\mathrm{Reconstructed}\ \mathrm{Energy}\ /\  \mathrm{TeV}$')
    ax.legend()
    plt.tight_layout()

    if output:
        plt.savefig(output)
    else:
        plt.show()
コード例 #16
0
ファイル: ScalarGaussian.py プロジェクト: CeasarSS/books
def main(argv=None):
    '''

    '''
    import numpy as np
    from utilities import read_data, SubPlot, axis
    from os.path import join

    if sys.version_info >= (3,0):
       print('%s needs matplotlib.  However, no matplotlib for python %s'%(
           sys.argv[0],sys.version_info,))
       return -1
    import matplotlib as mpl
    global DEBUG
    if DEBUG:
        mpl.rcParams['text.usetex'] = False
    else:
        mpl.use('PDF')
    import matplotlib.pyplot as plt

    if argv is None:                    # Usual case
        argv = sys.argv[1:]
    sim_file, fig_dir = argv

    params = {'axes.labelsize': 18,     # Plotting parameters for latex
              'text.fontsize': 15,
              'legend.fontsize': 15,
              'text.usetex': True,
              'font.family':'serif',
              'font.serif':'Computer Modern Roman',
              'xtick.labelsize': 15,
              'ytick.labelsize': 15}
    mpl.rcParams.update(params)

    data = read_data(sim_file)
    X = axis(data=data[0], magnitude=False, label=r'$t$',
             ticks=np.arange(0, 100.1, 25))

    def _plot(Y):
        fig = plt.figure(figsize=(3.5,2.5))
        ax = SubPlot(fig,(1,1,1),X,Y, color='b')
        ax.set_ylim(-0.02, 1.02)
        fig.subplots_adjust(bottom=0.15) # Make more space for label
        fig.subplots_adjust(left=.15, bottom=.18)
        return (ax, fig)

    ax, fig_b = _plot(axis(data=data[1], magnitude=False, label=r'$S(t)$',
                         ticks=np.arange(0, 1.1, 1)))
    ax, fig_d = _plot(axis(data=data[3], magnitude=False, label=r'$S(t)$',
                           ticks=np.arange(0, 1.1, 1)))

    ax, fig_c = _plot(axis(data=data[2], magnitude=False, label=r'$y(t)$',
                         ticks=np.arange(-4, 4.1, 4)))
    ax.set_ylim(-5, 5)
    fig_c.subplots_adjust(left=.2)
    if DEBUG:
        plt.show()
    else:
        fig_b.savefig(join(fig_dir, 'SGO_b.pdf'))
        fig_c.savefig(join(fig_dir, 'SGO_c.pdf'))
        fig_d.savefig(join(fig_dir, 'SGO_d.pdf'))
    return 0
コード例 #17
0
ファイル: ScalarGaussian.py プロジェクト: EJHortala/books-2
def main(argv=None):
    '''

    '''
    import numpy as np
    from utilities import read_data, SubPlot, axis
    from os.path import join

    if sys.version_info >= (3, 0):
        print('%s needs matplotlib.  However, no matplotlib for python %s' % (
            sys.argv[0],
            sys.version_info,
        ))
        return -1
    import matplotlib as mpl
    global DEBUG
    if DEBUG:
        mpl.rcParams['text.usetex'] = False
    else:
        mpl.use('PDF')
    import matplotlib.pyplot as plt

    if argv is None:  # Usual case
        argv = sys.argv[1:]
    sim_file, fig_dir = argv

    params = {
        'axes.labelsize': 18,  # Plotting parameters for latex
        'text.fontsize': 15,
        'legend.fontsize': 15,
        'text.usetex': True,
        'font.family': 'serif',
        'font.serif': 'Computer Modern Roman',
        'xtick.labelsize': 15,
        'ytick.labelsize': 15
    }
    mpl.rcParams.update(params)

    data = read_data(sim_file)
    X = axis(data=data[0],
             magnitude=False,
             label=r'$t$',
             ticks=np.arange(0, 100.1, 25))

    def _plot(Y):
        fig = plt.figure(figsize=(3.5, 2.5))
        ax = SubPlot(fig, (1, 1, 1), X, Y, color='b')
        ax.set_ylim(-0.02, 1.02)
        fig.subplots_adjust(bottom=0.15)  # Make more space for label
        fig.subplots_adjust(left=.15, bottom=.18)
        return (ax, fig)

    ax, fig_b = _plot(
        axis(data=data[1],
             magnitude=False,
             label=r'$S(t)$',
             ticks=np.arange(0, 1.1, 1)))
    ax, fig_d = _plot(
        axis(data=data[3],
             magnitude=False,
             label=r'$S(t)$',
             ticks=np.arange(0, 1.1, 1)))

    ax, fig_c = _plot(
        axis(data=data[2],
             magnitude=False,
             label=r'$y(t)$',
             ticks=np.arange(-4, 4.1, 4)))
    ax.set_ylim(-5, 5)
    fig_c.subplots_adjust(left=.2)
    if DEBUG:
        plt.show()
    else:
        fig_b.savefig(join(fig_dir, 'SGO_b.pdf'))
        fig_c.savefig(join(fig_dir, 'SGO_c.pdf'))
        fig_d.savefig(join(fig_dir, 'SGO_d.pdf'))
    return 0
コード例 #18
0
ファイル: main.py プロジェクト: rickqiu/msvm
import sys
import math
import time

import numpy as np
np.set_printoptions(precision=2, linewidth=1000)

import msvm
from msvm_kernels import linear, polynomial, rbf, sigmoid

from utilities import read_data, classification_accuracy, tune

if __name__ == '__main__':
    with open(sys.argv[1]) as f:
        X, y = read_data(f)

    N, d = X.shape

    train_N = int(0.8 * N)

    # split the data into trainint and testing sets
    sel_idx = np.random.choice(np.arange(N), train_N, replace=False)
    selection = np.full((N, ), False, dtype=bool)
    selection[sel_idx] = True

    train_X = X[selection, :]
    train_y = y[selection]

    test_X = X[np.invert(selection), :]
    test_y = y[np.invert(selection)]
コード例 #19
0
Sconv = np.array([1,1,1,1]) # strides of each convolution layer

num_trials = 10
he_init = True
if(he_init):
    from build_234layer_1dconv_graph_he import build_234layer_1dconv_graph
else:
    from build_234layer_1dconv_graph_rnd import build_234layer_1dconv_graph


par = {'batch_size':batch_size,'seq_len':seq_len,'lrn_rate':lrn_rate,'epochs':epochs,
         'krnl_sz':krnl_sz,'krnl_sz_Bsg':krnl_sz_Bsg,'L':L,'K':K,'n_classes':n_classes,'n_channels':n_channels
          ,'n_outchannel':N,'Spool':Spool,'Sconv':Sconv,'num_trials':num_trials,'act_func':act_func}
#%% Prepare data

X_train, labels_train, list_ch_train = read_data(data_path="data/", split="train") # train
X_test, labels_test, list_ch_test = read_data(data_path="data/", split="test") # test

assert list_ch_train == list_ch_test, "Mistmatch in channels!"
# Normalize?
X_train, X_test = standardize(X_train, X_test)

# Train/Validation Split
X_tr, X_vld, lab_tr, lab_vld = train_test_split(X_train, labels_train,
                                                stratify = labels_train, random_state = 123)

# One-hot encoding:
y_tr = one_hot(lab_tr)
y_vld = one_hot(lab_vld)
y_test = one_hot(labels_test)
コード例 #20
0
ファイル: train.py プロジェクト: sharadgupta27/simpleNN
def main():

    full_batch, num_cls, label_enum = read_data(filename=args.train_set,
                                                dim=args.dim)

    if args.val_set is None:
        print(
            'No validation set is provided. Will output model at the last iteration.'
        )
        val_batch = None
    else:
        val_batch, _, _ = read_data(filename=args.val_set,
                                    dim=args.dim,
                                    label_enum=label_enum)

    num_data = full_batch[0].shape[0]

    config = ConfigClass(args, num_data, num_cls)

    if isinstance(config.seed, int):
        tf.compat.v1.random.set_random_seed(config.seed)
        np.random.seed(config.seed)

    if config.net in ('CNN_4layers', 'CNN_7layers', 'VGG11', 'VGG13', 'VGG16',
                      'VGG19'):
        x, y, outputs = CNN(config.net, num_cls, config.dim)
        test_network = None
    else:
        raise ValueError('Unrecognized training model')

    if config.loss == 'MSELoss':
        loss = tf.reduce_sum(input_tensor=tf.pow(outputs - y, 2))
    else:
        loss = tf.reduce_sum(
            input_tensor=tf.nn.softmax_cross_entropy_with_logits(
                logits=outputs, labels=y))

    network = (x, y, loss, outputs)

    sess_config = tf.compat.v1.ConfigProto()
    sess_config.gpu_options.allow_growth = True

    with tf.compat.v1.Session(config=sess_config) as sess:

        full_batch[0], mean_tr = normalize_and_reshape(full_batch[0],
                                                       dim=config.dim,
                                                       mean_tr=None)
        if val_batch is not None:
            val_batch[0], _ = normalize_and_reshape(val_batch[0],
                                                    dim=config.dim,
                                                    mean_tr=mean_tr)

        param = tf.compat.v1.trainable_variables()

        mean_param = tf.compat.v1.get_variable(name='mean_tr',
                                               initializer=mean_tr,
                                               trainable=False,
                                               validate_shape=True,
                                               use_resource=False)
        label_enum_var = tf.compat.v1.get_variable(name='label_enum',
                                                   initializer=label_enum,
                                                   trainable=False,
                                                   validate_shape=True,
                                                   use_resource=False)
        saver = tf.compat.v1.train.Saver(var_list=param + [mean_param])

        if config.optim in ('SGD', 'Adam'):
            gradient_trainer(config, sess, network, full_batch, val_batch,
                             saver, test_network)
        elif config.optim == 'NewtonCG':
            newton_trainer(config,
                           sess,
                           network,
                           full_batch,
                           val_batch,
                           saver,
                           test_network=test_network)
            Y_train.append(ground_truth_train[date][x, y])
    X_train += X_train_0_label[date]
    Y_train += Y_train_0_label[date]
    X_train = np.array(X_train)
    Y_train = np.array(Y_train)

    return X_train, Y_train


dates = ['2019_07_25', '2019_09_20', '2019_10_11']
N = 200
repeats = 5
version = "fraction"

print("1. Read data")
bands_train, bands_test, ground_truth_train, ground_truth_test, mask_train, mask_test = read_data(dates=dates)

print("2. Create test samples")
X_test, Y_test = create_test_samples(
    dates=dates, bands_test=bands_test, ground_truth_test=ground_truth_test, mask_test=mask_test)

print("3. Create train samples with 0 label")
X_train_0_label, Y_train_0_label = create_train_samples_0_label(
    dates=dates, bands_train=bands_train, ground_truth_train=ground_truth_train, mask_train=mask_train)

print("4. Create output files")
if version == "linear":
    results_file = open(f"Results_f1_score_vs_no_samples_{version}.txt", "a")
    results_file.write("date\tn\trepeat\tF1_score_test\tcoefs[0]\tcoefs[1]\tcoefs[2]\tintercept\tthreshold\n")
elif version == "fraction":
    results_file = open(f"Results_f1_score_vs_no_samples_{version}.txt", "a")
コード例 #22
0
def single_model():
    train = read_data('raw_data/train.csv')
    test = read_data('raw_data/test.csv')

    df_tr = pd.read_csv(agg_loc + agg_name)
    train = pd.merge(train, df_tr, on='card_id', how='left').fillna(0)
    test = pd.merge(test, df_tr, on='card_id', how='left').fillna(0)

    del df_tr

    train = fe.combine_categs(train)
    test = fe.combine_categs(test)

    kfolds = KFold(5, shuffle=True, random_state=42)

    results = {}

    for_second_level = pd.DataFrame({'target': train['target']})

    for model in model_list.keys():
        to_train = model_list.get(model)

        for selection in sel_list:
            to_select = sel_list.get(selection)

            print(f'{model}_{selection}')

            df_train = train.copy()
            df_test = test.copy()

            target = df_train['target']
            id_to_sub = df_test['card_id']
            del df_train['target']
            del df_train['card_id']
            del df_test['card_id']

            df_train, df_test = to_select(df_train, df_test)

            predictions, cv_score, feat_imp, oof = to_train(
                df_train, df_test, target, kfolds)

            results[model + '_' + selection] = cv_score

            for_second_level[model + '_' + selection] = oof

            sub_df = pd.DataFrame({"card_id": id_to_sub.values})
            sub_df["target"] = predictions
            sub_df.to_csv(save_loc + model + '_' + selection + '.csv',
                          index=False)
            feat_imp.to_csv(save_loc + model + '_' + selection +
                            "_featimp.csv",
                            index=False)

            for_second_level.to_csv(save_loc + 'oof_predictions.csv')

            print(f'{model}_{selection}:\t {cv_score}')
            print('_' * 40)
            print('_' * 40)
            print('\n')

    final = pd.DataFrame.from_dict(results,
                                   orient='index',
                                   columns=['CV_score'])
    final.to_csv(save_loc + 'single_cvscores.csv')
    for_second_level.to_csv(save_loc + 'oof_predictions.csv')
コード例 #23
0
import numpy as np
import utilities as ut

X_train, labels_train, list_ch_train = ut.read_data(
    data_path="./datasets/data", split="train")  # train
コード例 #24
0
def mainGJ(filename, **kwargs):  # TODO: FIX THE KWARGS !!!
	""" Main execution using GaussJordan elimination"""

	DEBUG = get_or_default(kwargs, 'DEBUG', False)
	data = read_data(filename, ' ')
	c = Counter(data['data'])
	child_name = "C1"
	child_idx = data['header'].index(child_name)
	num_columns = len(data['header'])
	new_counter = match_by_column(c, child_idx)

	binary_data = binarize(new_counter)

	items = sorted(binary_data.items(), key=lambda x: x[1][2], reverse=True)

	def leak_exponent(k):
		#return (-sum(k)+1,)
		return (1,)
		#return ()

	log_base = 2

	A_vect = [k + leak_exponent(k) for k, v in items if v[0] not in(1.0, 0.0)]
	A = np.array(A_vect) * Fraction(1, 1)

	b_vect = [v[0] for k, v in items if v[0] not in (1.0, 0.0)]
	b_vect = [log(1.0 - b, log_base) for b in b_vect]

	b_cnt = [(v[1], v[2]) for k, v in items if v[0] not in (1.0, 0.0)]

	if DEBUG:
		for i in xrange(A.shape[0]):
			print "b%d"%i, A_vect[i], b_vect[i], b_cnt[i]
	
	b = np.array(sp.symbols('b0:%d' % A.shape[0]))
	subs = dict(zip(b,b_vect))
	subs_cnt = dict(zip(b,b_cnt))
	
	A2, b2 = GaussJordanElimination(A, b)
	b3 = [1.0 - float(log_base**b.evalf(subs=subs)) for b in b2]

	subs_str = tuple([(str(k), v) for k, v in subs.iteritems()]) + tuple([("r%d"%i, b2[i]) for i in range(len(b2)) ])
	subs_str = dict(subs_str)

	if DEBUG:
		print augment([A2, b2, b3])

	nonzero_i = (i for i in range(A2.shape[0]) if any(j!=0 for j in A2[i]))
	zero_i = (i for i in range(A2.shape[0]) if all(j==0 for j in A2[i]))
	nonzero_v = list((A2[i], b2[i]) for i in nonzero_i)
	zero_v = list((A2[i], b2[i]) for i in zero_i)

	def product(l):
		return reduce(lambda x, y: x * y, l)

	def _min_fitness(b_val, b_subs_cnt_orig):
		b_subs_cnt = dict((k, v[1]) for k, v in b_subs_cnt_orig.iteritems())
		total = sum(b_subs_cnt.values())
		coeff = [(b.args if b.args else (1, b)) for b in (b_val.args if not type(b_val)==sp.Symbol else [b_val])]
		min_c = min(b_subs_cnt[c[1]] for c in coeff)
		return min_c / float(total)

	def _avg_fitness(b_val, b_subs_cnt_orig):
		b_subs_cnt = dict((k, v[1]) for k, v in b_subs_cnt_orig.iteritems())
		total = sum(b_subs_cnt.values())
		coeff = [(b.args if b.args else (1, b)) for b in (b_val.args if not type(b_val) == sp.Symbol else [b_val])]
		#print coeff
		return sum(b_subs_cnt[s[1]] / float(total) for s in coeff)/ float(sum(abs(s) for s, _ in coeff))
		#return sum(abs(s[0])*(b_subs_cnt[s[1]]/float(total)) for s in coeff) / sum(b_subs_cnt[s[1]]/float(total) for s in coeff)
		#return 1

	def _max_count_fitness(b_val, b_subs_cnt_orig):
		b_subs_cnt = dict( (k,v[1]) for k, v in b_subs_cnt_orig.iteritems())
		total = sum(b_subs_cnt.values())
		coeff = [(b.args if b.args else (1, b)) for b in (b_val.args if not type(b_val)==sp.Symbol else [b_val])]
		return sum(b_subs_cnt[s[1]]/abs(s[0]) for s in coeff) / float(total)
	
	def _pu(x,n,c):
		n = float(n)
		x = float(x)
		c = float(c)
		sqr = sqrt(((x/n)*(1.0-x/n))/n)
		return c*sqr
		#return x/n-Ualph*sqr,x/n+Ualph*sqr

	def _pu_fitness(b_val, b_subs_cnt):
		#total = sum(b_subs_cnt.values())
		coeff = [(b.args if b.args else (1, b)) for b in (b_val.args if not type(b_val)==sp.Symbol else [b_val])]
		#return 1.0 - max(b_subs_cnt[b][0]/float(b_subs_cnt[b][1]) - _pu(b_subs_cnt[b][0], b_subs_cnt[b][1], 1.65)[0] for c, b in coeff)
		#return 1.0 - max(b_subs_cnt[b][0]/float(b_subs_cnt[b][1]) - abs(c)*_pu(b_subs_cnt[b][0], b_subs_cnt[b][1], 1.65) for c, b in coeff)
		return 1.0 - max(abs(c)*_pu(b_subs_cnt[b][0], b_subs_cnt[b][1], 1.65) for c, b in coeff)
		
	#fitness = _min_fitness
	#fitness = _avg_fitness
	fitness = _pu_fitness

	#BELOW: poor fitness!
	#fitness = _max_count_fitness

	solutions = []
	for i in nonzero_v:
		for zv in ([(0,0)] + zero_v):
			for coeff in [2, 1,-1, -2]:
				expr = (i[1] + coeff*zv[1])
				fit = fitness(expr, subs_cnt)
				#print i[0], " [",coeff,"]", zv[0], "expr:",expr, "value:",float(1.0 - log_base**expr.evalf(subs=subs)), "fitness:", fit
				solutions.append((i[0],'V' if type(zv[0])!=int else '0', coeff, zv[1],"EXPR:", expr, float(1.0 - log_base ** expr.evalf(subs=subs)), fit))
				if type(zv[0]) == int:
					break

	GJElim_fit_distribution = []
	num_best_solutions = 5
	for i in range(num_columns):
		solutions_filtered = [s for s in sorted(solutions, key= lambda x: x[-1], reverse=True) if s[0][i] == 1][:num_best_solutions]
		GJElim_fit_distribution.append(solutions_filtered[0][-2])
		suma = sum(s[-1]*s[-2] for s in solutions_filtered)
		if DEBUG:
			for s in solutions_filtered:
				print s
			print suma / sum(s[-1] for s in solutions_filtered)
			print ""

	if DEBUG:
		print augment([A2, b2, b3])

	GJElim_distribution = []
	for i in range(num_columns):
		for j in range(A2.shape[0]):
			if A2[j][i] == 1:
				GJElim_distribution.append(b3[j])
				break
	GJElim_distribution = [(d if d>0 else 10e-5) for d in GJElim_distribution]
	GJElim_fit_distribution = [(d if d>0 else 10e-5) for d in GJElim_fit_distribution]

	outs = []
	labels = []
	for h in data['header']:
		labels.append(["True", "False"])
		#FIXME: data['domain'] does not keep states sorted so states are messed up
		#labels.append(data['domain'][h])
		
	for solution in [GJElim_distribution, GJElim_fit_distribution]:
		leak = solution[-1]
		params = reduce( lambda x,y: x+y, [[a,0] for a in solution[:-1]]) + [leak,]
		parent_dims = [2]*(num_columns-1)
		GJ_CPT = CPT([params, [1.0 - p for p in params]], parent_dims, CPT.TYPE_NOISY_MAX, data['header'], labels)
		outs.append(GJ_CPT)

	return outs
コード例 #25
0
def starting_counts(sequences):
    tag_starts = {}
    for seq in sequences:
        if seq[0] in tag_starts:
            tag_starts[seq[0]] += 1
        else:
            tag_starts[seq[0]] = 1
    return tag_starts


def ending_counts(sequences):
    tag_ends = {}
    for seq in sequences:
        if seq[-1] in tag_ends:
            tag_ends[seq[-1]] += 1
        else:
            tag_ends[seq[-1]] = 1
    return tag_ends


tagfile = "tags-universal.txt"
datafile = "brown-universal.txt"

tagset = read_tags(tagfile)
sentences = read_data(datafile)
keys = tuple(sentences.keys())
wordset = frozenset(chain(*[s.words for s in sentences.values()]))
word_sequences = tuple([sentences[k].words for k in keys])
tag_sequences = tuple([sentences[k].tags for k in keys])
N = sum(1 for _ in chain(*(s.words for s in sentences.values())))
コード例 #26
0
ファイル: data_processing.py プロジェクト: lokira/tools
def plot_mag_phase(dict_G,
                   dict_D,
                   cmd_fr,
                   cmd_im,
                   cmd_re,
                   check_entry_list,
                   style=None):
    """
    Read Data
    """
    data10_fr = uti.read_data(dict_D, cmd_fr)
    data10_fr_G = uti.read_data(dict_G, cmd_fr)
    data10_re = uti.read_data(dict_D, cmd_re)
    data10_re_G = uti.read_data(dict_G, cmd_re)
    data10_im = uti.read_data(dict_D, cmd_im)
    data10_im_G = uti.read_data(dict_G, cmd_im)

    title_mag = cmd_im.replace('im', 'mag')
    entry_mag = CheckEntry(title_mag, CheckEntry.XY)
    title_phase = cmd_im.replace('im', 'phase')
    entry_phase = CheckEntry(title_phase, CheckEntry.XY)
    """
    Process Golden Data
    """
    if data10_fr_G is None:
        entry_mag.add_err_msg(CMD_NOT_FOUND_S % (GOLDEN_S, cmd_fr))
        entry_phase.add_err_msg(CMD_NOT_FOUND_S % (GOLDEN_S, cmd_fr))
    if data10_im_G is None:
        entry_mag.add_err_msg(CMD_NOT_FOUND_S % (GOLDEN_S, cmd_im))
        entry_phase.add_err_msg(CMD_NOT_FOUND_S % (GOLDEN_S, cmd_im))
    if data10_re_G is None:
        entry_mag.add_err_msg(CMD_NOT_FOUND_S % (GOLDEN_S, cmd_re))
        entry_phase.add_err_msg(CMD_NOT_FOUND_S % (GOLDEN_S, cmd_re))

    if data10_fr_G and data10_im_G and data10_re_G:
        if not (uti.is_same_len(data10_fr_G, data10_re_G)
                and uti.is_same_len(data10_fr_G, data10_im_G)):
            entry_mag.add_err_msg(XY_NOT_MATCH_S % (GOLDEN_S, title_mag))
            entry_phase.add_err_msg(XY_NOT_MATCH_S % (GOLDEN_S, title_phase))
        else:
            mag_G, phase_G = uti.get_mag_angle(data10_re_G, data10_im_G)
            entry_mag.load_data_G([data10_fr_G, mag_G])
            entry_phase.load_data_G([data10_fr_G, phase_G])
    """
    Process DUT Data
    """
    if data10_fr is None:
        entry_mag.add_err_msg(CMD_NOT_FOUND_S % (DUT_S, cmd_fr))
        entry_phase.add_err_msg(CMD_NOT_FOUND_S % (DUT_S, cmd_fr))
    if data10_im is None:
        entry_mag.add_err_msg(CMD_NOT_FOUND_S % (DUT_S, cmd_im))
        entry_phase.add_err_msg(CMD_NOT_FOUND_S % (DUT_S, cmd_im))
    if data10_re is None:
        entry_mag.add_err_msg(CMD_NOT_FOUND_S % (DUT_S, cmd_re))
        entry_phase.add_err_msg(CMD_NOT_FOUND_S % (DUT_S, cmd_re))

    if data10_fr and data10_im and data10_re:
        if not (uti.is_same_len(data10_fr, data10_re)
                and uti.is_same_len(data10_fr, data10_im)):
            entry_mag.add_err_msg(XY_NOT_MATCH_S % (DUT_S, title_mag))
            entry_phase.add_err_msg(XY_NOT_MATCH_S % (DUT_S, title_phase))
        else:
            mag, phase = uti.get_mag_angle(data10_re, data10_im)
            entry_mag.load_data([data10_fr, mag])
            entry_phase.load_data([data10_fr, phase])

    if uti.is_not_same_len_not_empty(entry_mag.get_data(),
                                     entry_mag.get_data_G()):
        entry_mag.add_err_msg(GD_NOT_MATCH_S)
        entry_phase.add_err_msg(GD_NOT_MATCH_S)

    entry_mag.xlabel = 'freq'
    entry_mag.ylabel = 'mag'

    entry_phase.xlabel = 'freq'
    entry_phase.ylabel = 'phase'

    check_entry_list.append(entry_mag)
    check_entry_list.append(entry_phase)
コード例 #27
0
ファイル: predict.py プロジェクト: Sinacam/simpleNN
	sess_config = tf.compat.v1.ConfigProto()
	sess_config.gpu_options.allow_growth = True

	with tf.compat.v1.Session(config=sess_config) as sess:
		graph_address = args.model_file + '.meta'
		imported_graph = tf.compat.v1.train.import_meta_graph(graph_address)
		imported_graph.restore(sess, args.model_file)
		mean_param = [v for v in tf.compat.v1.global_variables() if 'mean_tr:0' in v.name][0]
		label_enum_var = [v for v in tf.compat.v1.global_variables() if 'label_enum:0' in v.name][0]
		
		sess.run(tf.compat.v1.variables_initializer([mean_param, label_enum_var]))
		mean_tr = sess.run(mean_param)
		label_enum = sess.run(label_enum_var)

		test_batch, num_cls, _ = read_data(args.test_set, dim=args.dim, label_enum=label_enum)
		test_batch[0], _ = normalize_and_reshape(test_batch[0], dim=args.dim, mean_tr=mean_tr)

		x = tf.compat.v1.get_default_graph().get_tensor_by_name('main_params/input_of_net:0')
		y = tf.compat.v1.get_default_graph().get_tensor_by_name('main_params/labels:0')
		outputs = tf.compat.v1.get_default_graph().get_tensor_by_name('output_of_net:0')

		if args.loss == 'MSELoss':
			loss = tf.reduce_sum(input_tensor=tf.pow(outputs-y, 2))
		else:
			loss = tf.reduce_sum(input_tensor=tf.nn.softmax_cross_entropy_with_logits(logits=outputs, labels=tf.stop_gradient(y)))
		
		network = (x, y, loss, outputs)

		avg_loss, avg_acc, results = predict(sess, network, test_batch, args.bsize)