Ejemplo n.º 1
0
Archivo: main.py Proyecto: hankchau/cnn
def run_contrast_data():
    '''Plots some csv data sample files.'''
    outpath = 'example/sample_output/run_contrast_data/'
    if os.path.isdir(outpath):
        shutil.rmtree(outpath)
    os.makedirs(outpath, exist_ok=True)

    # check data
    fpath1 = 'csv_data/Basement/2020-11-21/172102_000000.csv'
    fpath2 = 'csv_data/Basement/2020-11-24/090936_111111.csv'

    mat1 = data.read_csv(fpath1)
    mat2 = data.read_csv(fpath2)

    # compare '111111' and '000000'
    titles = ['111111', '000000']
    ill.render(mat2, os.path.join(outpath, 'L_1.png'))
    ill.render(mat1, os.path.join(outpath, 'L_0.png'))
    ill.contrast([mat2, mat1],
                 os.path.join(outpath, 'gray_0_1.png'),
                 'Grayscale before denoising',
                 titles,
                 cmap='gray')
    ill.contrast([mat2, mat1],
                 os.path.join(outpath, 'rainbow_0_1.png'),
                 'Heatmap before denoising',
                 titles,
                 cmap='rainbow')

    # denoise by subtraction
    denoise = mat2 - mat1
    titles = ['denoise', 'abs(denoise)']
    ill.render(denoise, os.path.join(outpath, 'L_subtract.png'))
    ill.contrast([denoise, np.abs(denoise)],
                 os.path.join(outpath, 'gray_subtract.png'),
                 'Normalization with Subtraction',
                 titles,
                 cmap='gray')
    ill.contrast([denoise, np.abs(denoise)],
                 os.path.join(outpath, 'rainbow_subtract.png'),
                 'Normalization with Subtraction',
                 titles,
                 cmap='rainbow')

    # denoise by cropping image
    c_mat2 = mat2[27:, :]
    titles = ['before cropping', 'after cropping']
    ill.render(c_mat2, os.path.join(outpath, 'crop_L_1.png'))
    ill.contrast([mat2, c_mat2],
                 os.path.join(outpath, 'gray_crop.png'),
                 'Normalization after cropping',
                 titles,
                 cmap='gray')
    ill.contrast([mat2, c_mat2],
                 os.path.join(outpath, 'rainbow_crop.png'),
                 'Normalization after cropping',
                 titles,
                 cmap='rainbow')
Ejemplo n.º 2
0
def train(train_csv):
    data = read_csv(train_csv)
    dict_data = preprocess(data)
    X, Y = convert_to_input(dict_data)
    model = model_generate()
    modela.fit(X, Y, epochs=10)
    return (modela)
Ejemplo n.º 3
0
    def __init__(self, g, csv_path):
        SituationBase.__init__(self, g)
        self.FRAME_RATE = 22
        self.log("Reading config %s" % csv_path)
        self.scenes = dict([(rec["Number"], rec) for rec in data.read_csv(csv_path, self.g.game_data)])

        self.map_pane = self.add_pane("MINIMAP", MapPane(self))

        self.set_current_scene("1")
Ejemplo n.º 4
0
def read_train_test_frames():
    df = data.read_csv(index_col=8)

    # Dependent and independent features
    y_df = df[[data.DEPENDENT]]
    x_df = df[data.independents(df)]

    # Split into training and testing sets
    test_size = .20  # 10e6 observations on rank 3 justifies reduced test size
    split_kwargs = dict(test_size=test_size, random_state=_RANDOM_STATE)
    return train_test_split(x_df, y_df, **split_kwargs)
Ejemplo n.º 5
0
def plot_heatmaps(csv_files, outpaths, shading='gouraud', cmap='rainbow', roi_boxes=True):
    range_depth = 12.8
    range_width = +sin(radians(60)) * range_depth

    x, y = data.get_transform_index()
    # matplotlib
    fig = plt.figure(figsize=(6,3))
    ax = fig.add_subplot(111)
    fig.suptitle('Range Azimuth Heatmap (-60\N{DEGREE SIGN}, 60\N{DEGREE SIGN})')

    for i in range(len(csv_files)):
        if outpaths[i] == '':
            continue
        ax.set_xlabel('Azimuth [m]')
        ax.set_ylabel('Range [m]')
        ax.set_xlim([-range_width - 0.5, range_width + 0.5])
        ax.set_ylim([0, range_depth + 0.5])
        # plot ROI boxes
        if roi_boxes:
            slots = [53, 54, 55, 56, 57, 58]
            for j in range(len(slots)):
                id = slots[j]
                reg = data.data_index[str(id)]
                pts = reg.get_corners()
                pts.append(pts[0])
                x_coord = []
                y_coord = []
                for p in pts:
                    x_coord.append(p[0])
                    y_coord.append(p[1])
                ax.plot(x_coord, y_coord, 'black')

        mat = data.read_csv(csv_files[i])
        im = ax.pcolormesh(x, y, mat, cmap=cmap, shading=shading, vmin=0.0, vmax=500)
        cb = fig.colorbar(im)
        fig.tight_layout()
        fig.savefig(outpaths[i])
        cb.remove()
        plt.cla()
        gc.collect()
    plt.clf()
    plt.close(fig)
    gc.collect()
Ejemplo n.º 6
0
    def __init__(self, sit):
        utils.Pane.__init__(self, sit, 600, 30, 800, 230, (140, 180, 160))
        self.background = data.load_image("MiniMap.png")

        class Location(object):
            def __init__(self, rec):
                self.name = rec["Location"]
                self.x = int(rec["x"])
                self.y = int(rec["y"])

        if not MapPane.locations:
            MapPane.locations = {}
            for rec in data.read_csv("map_locations.csv", self.g.game_data):
                if rec["Location"]:
                    loc = Location(rec)
                    MapPane.locations[loc.name] = loc

        if not self.g.movement_path:
            self.move_to_location("Apartment")
        else:
            self.render()
Ejemplo n.º 7
0
    for ex in examples:
        # Ignore NA-scored instances
        if ex[1] == "NA":
            continue

        if len(batch) >= batch_size:
            yield pack_samples(batch)
            batch = [ex]
        else:
            batch.append(ex)

    # Final batch
    if len(batch) > 0:
        yield pack_samples(batch)


if __name__ == "__main__":
    # Test KoBERT
    import data

    labeled = data.read_csv("data/sample.csv")
    b_gen = batch_samples(labeled, 8)

    model = BertSentimentPredictor()
    model.train()

    input_b = next(b_gen)[0]
    print(model(input_b))
    print(model(input_b))
Ejemplo n.º 8
0
        if len(batch) >= batch_size:
            yield pack_samples(batch)
            batch = [ex]
        else:
            batch.append(ex)

    # Final batch
    if len(batch) > 0:
        yield pack_samples(batch)


if __name__ == "__main__":
    # Test CBiLSTM
    import data

    labeled = data.read_csv("data/sample.csv")
    unlabeled = data.read_csv("data/thaad_relevant.csv")

    # First get the char/jamo vocabulary
    c2i = defaultdict(lambda: len(c2i))
    j2i = defaultdict(lambda: len(j2i))
    c2i["[PAD]"]
    j2i["[PAD]"]

    for i, ex in enumerate(labeled):
        print(f"Reading char & jamo vocabulary from labeled texts: {i}",
              end="\r")
        for c in ex[0]:
            c2i[c]
        for j in jamo.j2hcj(jamo.h2j(ex[0])):
            j2i[j]
Ejemplo n.º 9
0
def test(test_csv):
    data = read_csv(test_csv)
    dict_data = preprocess_test(test_csv)
    X, Y = convert_to_input_test(test_csv)
    return (X, Y)
Ejemplo n.º 10
0
def validate_dataset(csv_path):
    data_v = read_csv(csv_path)
    dict_data = preprocess(data_v)
    X_valid, Y_valid = convert_to_input(dict_data)
    return (X_valid, Y_valid)
Ejemplo n.º 11
0
Archivo: show.py Proyecto: rom1212/nns
#!/usr/bin/python3
import numpy as np
from operator import itemgetter
from day import norm_days, norm_day, get_high, get_low, get_open, get_close
from data import read_csv, read_tdx, match_all, read_dir
from draw import draw_box, plot_row, plot_target
import matplotlib.pyplot as plt
from matplotlib.figure import figaspect

if __name__ == "__main__":
    data, dates = read_csv('./csv1')
    exit()

    data, dates = read_tdx('./testdata/testdata_1.txt')
    factor = 1

    # data, dates = read_tdx('./sample1.txt')
    # data, dates = read_tdx('./600030.txt')
    
    # factor = 100

    print(len(data))
    print(len(dates))
    total_days = len(data)

    ldays = 5
    target = data[total_days - ldays:,:]

    data, dates = read_dir('./stock/', -1, read_tdx)

    # cands = data[:total_days - ldays,:]
        y = np.zeros(28)
        for key in lbl:
            y[int(key)] = 1
        labels.append(y)

    return np.array(paths), np.array(labels)


if __name__ == '__main__':
    patience = 5
    earlyThreshold = 0.0005
    num_fold = 5
    mskf = MultilabelStratifiedKFold(n_splits=num_fold, random_state=0)

    fold = 0
    X, y, _ = read_csv()

    for train_index, val_index in mskf.split(X, y):

        X_val, y_val = X[val_index], y[val_index]
        X_train, y_train = X[train_index], y[train_index]

        print("starting fold: {}".format(fold))

        if not os.path.exists(config.submit):
            os.makedirs(config.submit)
        if not os.path.exists(config.weights + config.model_name + os.sep +
                              str(fold)):
            os.makedirs(config.weights + config.model_name + os.sep +
                        str(fold))
        if not os.path.exists(config.best_models):
Ejemplo n.º 13
0
# coding: utf-8

import matplotlib.pyplot as plt
#%matplotlib inline
import numpy as np
import data as pd
import os
from sklearn import datasets, linear_model

print(os.getcwd())
data=pd.read_csv('ccpv.csv',encoding ="utf-8",header =0)

print(data.head())
print(data.shape)
print(data.info())
print(data.ix[:, [0,1,2,3]])
x = data.ix[:, [0,1,2,3]]
x.head()
y=data[["PE"]]
print(y.head())

print(x.head())

from sklearn.cross_validation import train_test_split
x_train, x_test, y_train, y_test = train_test_split(x, y, random_state=1)
print(x_train.shape)
print(x_test.shape)
print(y_train.shape)
print(y_test.shape)

from sklearn.linear_model import LinearRegression
Ejemplo n.º 14
0
"""Exploratory data analysis"""
import datetime as dt
import time

import matplotlib.pyplot as plt
import pandas as pd
from pandas.tools.plotting import andrews_curves, autocorrelation_plot, \
    lag_plot, parallel_coordinates, scatter_matrix
from sklearn.decomposition import PCA, KernelPCA

import data

if __name__ == '__main__':
    df = data.read_csv()

    # Description matrix
    print('Description:\n{}'.format(df.describe()))

    # Number of samples for visualization and other compute bound steps
    nsamples = 500

    # Scatterplot
    # Take a random sample of data rather than visualize all data
    sample_df = df.sample(nsamples)
    scatter_kwds = dict(alpha=0.2, figsize=(15, 15))

    diagonals = ['hist', 'kde']
    for d in diagonals:
        plt.clf()  # Clear any existing figure
        axes = scatter_matrix(sample_df, diagonal=d, **scatter_kwds)
        scatter_matrix_fp_fmt = 'output/scatter_matrix_{}.png'
Ejemplo n.º 15
0
    return parser.parse_args()


if __name__ == "__main__":
    args = parse_arguments()

    # Setting shortcuts
    CLF_OR_REG = args.exp_mode == "clf"  # True is CLF, False is REG
    CUDA = args.cuda_device
    COTR_EPOCH = args.epoch
    BATCH_SIZE = args.batch_size
    N = BATCH_SIZE * 2
    U_SUB_SIZE = BATCH_SIZE * 10

    # Data generators
    labeled = data.read_csv(args.labeled_path)
    unlabeled = data.read_csv(args.unlabeled_path)

    # Char/jamo vocabularies
    if args.model_path is None:
        c2i = defaultdict(lambda: len(c2i))
        j2i = defaultdict(lambda: len(j2i))
        c2i["[PAD]"]
        j2i["[PAD]"]
    else:
        checkpoint = torch.load(args.model_path)
        c2i = checkpoint["c2i"]
        j2i = checkpoint["j2i"]

    # Labeled & unlabeled pool of data
    L = []
Ejemplo n.º 16
0
from data import read_csv, build_sentences_labels, handle_uncommon_words, handle_unknown_words, build_sentences, handle_unknown_sentences
from probs import build_emission_map, build_transition_map


if __name__ == '__main__':
    X_train = read_csv("./data/dev_x.csv")
    X_labels = read_csv("./data/dev_y.csv")

    sentences, labels = build_sentences_labels(X_train, X_labels, k=2)
    sentences = handle_uncommon_words(sentences)

    #transition_map = build_transition_map(labels)
    #emission_map = build_emission_map(sentences, labels)
    vocab = set()
    for sentence in sentences:
        for word in sentence:
            vocab.add(word)

    not_found = []
    test_sentences = build_sentences(read_csv('./data/test_x.csv'), k=2)
    test_sentences = handle_unknown_sentences(test_sentences, vocab)
    for test_sentence in test_sentences:
        for test_word in test_sentence:
            if test_word not in vocab:
                not_found.append(test_word)

    with open('output.txt', 'w') as f:
        for word in not_found:
            f.write(f"{word}\n")
Ejemplo n.º 17
0
def test(model_paths: Sequence[Text], test_data_paths: Sequence[Text],
         pretrained_model_name: Text, label_col: Text, n_rows: int,
         batch_size: int, verbose: bool):

    width = max(len(p) for p in model_paths + test_data_paths)
    headers = ["precision", "recall", "f1-score", "support"]
    header_fmt = f'{{:<{width}s}} ' + ' {:>9}' * 4
    row_fmt = f'{{:<{width}s}} ' + ' {:>9.3f}' * 3 + ' {:>9}'

    # load the tokenizer model
    tokenizer_for = transformers.AutoTokenizer.from_pretrained
    tokenizer = tokenizer_for(pretrained_model_name)

    # load the pre-trained transformer model
    model_for = transformers.TFAutoModel.from_pretrained
    transformer = model_for(pretrained_model_name)

    test_data_rows = {p: [] for p in test_data_paths}
    for model_path in model_paths:
        tf.keras.backend.clear_session()

        # load the fine-tuned transformer model
        model = models.from_transformer(transformer=transformer, n_outputs=1)
        model.load_weights(model_path).expect_partial()

        for data_path in test_data_paths:

            # tokenize the test data
            df = data.read_csv(data_path=data_path,
                               label_col=label_col,
                               n_rows=n_rows)
            x, y_ref = data.df_to_xy(df=df,
                                     tokenizer=tokenizer,
                                     label_col=label_col)

            # predict on the test data
            y_pred_scores = model.predict(x, batch_size=batch_size)
            y_pred = (y_pred_scores >= 0.5).astype(int).ravel()

            # evaluate predictions
            stats_arrays = sklearn.metrics.precision_recall_fscore_support(
                y_ref, y_pred, labels=[1])
            stats = [a.item() for a in stats_arrays]
            row = [model_path] + stats
            test_data_rows[data_path].append(row_fmt.format(*row))

            # if requested, print detailed results for this model
            if verbose:
                header = header_fmt.format(data_path, *headers)
                print("=" * len(header))
                print(header)
                print(row_fmt.format(*row))
                print("=" * len(header))
                df.insert(1, "prediction", y_pred_scores)
                print(df)
                print()

    # print results for all models on all datasets
    for data_path, rows in test_data_rows.items():
        print(header_fmt.format(data_path, *headers))
        for row in rows:
            print(row)
        print()
Ejemplo n.º 18
0
 def load_questions(self):
     records = data.read_csv("InterviewQuiz.csv", self.g.game_data)
     QuizSituationBase.questions = dict([(rec["Number"], rec) for rec in records])
     QuizSituationBase.questions_by_q = dict([(rec["Question"], rec) for rec in records])