Example #1
0
def play(inp, game, custom=False):
    if custom:
        value = read_data.get_data(game, custom=True)
    else:        
        value = read_data.get_data(game)
    game_data = value['game_data']
    content_data = value['content_data']
    # print(game_data)
    x = game_data[inp]
    opt_val = [content_data[i] for i in x['options']]
    if 'none' in opt_val:
        print(inp, opt_val, x)
        return {'error':content_data[inp]}
    else:
        # print(x)
        # print('Chapter:{}\nFact:{}'.format(
        #     content_data[x['chapter']], x['fact']))
        # ops = x['options']
        # print(ops)
        # print(content_data[inp])
        # for i in ops:
        #     print('----------------\nOption:{}\nNext:{}\nMore:{}'.format(
        #         content_data[i], ops[i]['next'], ops[i]['more']))
        # ret_dct = {
        #     'chapter': x['chapter'],
        #     'fact': x['fact'],
        #     'options':x[options]
        # }
        x['question'] = inp
        return x
Example #2
0
def plot(stock_name, first_date, last_date):
    tf = 'day'
    df = pd.DataFrame(rd.get_data(stock_name, tf, first_date, last_date))
    df["date"] = pd.to_datetime(df["date"])

    mids = (df.open + df.close) / 2
    spans = abs(df.close - df.open)

    inc = df.close > df.open
    dec = df.open > df.close
    nor = df.close = df.open
    w = 12 * 60 * 60 * 1000  # half day in ms

    output_file("candlestick.html", title="candlestick.py example")

    TOOLS = "pan,wheel_zoom,box_zoom,reset,save"

    p = figure(x_axis_type="datetime", tools=TOOLS, plot_height=480, plot_width=720, toolbar_location="right")

    p.segment(df.date, df.high, df.date, df.low, color="black")
    p.rect(df.date[inc], mids[inc], w, spans[inc], fill_color="#2ECC71", line_color="black")
    p.rect(df.date[dec], mids[dec], w, spans[dec], fill_color="#F2583E", line_color="black")
    p.rect(df.date[nor], mids[nor], w, spans[nor], fill_color="black", line_color="black")

    p.title = "Candlestick of" + str(stock_name)
    p.xaxis.major_label_orientation = pi / 4
    p.grid.grid_line_alpha = 0.3

    show(p)  # open a browser
Example #3
0
def generate_gaussian_dataset(file, spectral):
    """
    Extend data set.

    Args:
       file: The file dir to get data_set, data needs to extend, one spectral add gaussian noise generates, and label_set, Corresponding label.
       spectral: A list, its index means the categories and the corresponding value is the spectral which needs add noise.

    Return:
        data: Extended data set.
        label: Extended label set.
    """
    data_set, label_set = rd.get_data(file)
    classes = np.max(label_set) + 1
    data_num = np.zeros(classes, 1)
    for i in label_set:
        data_num[i] += 1
    data_mean, data_std = da.get_statistic_by_class(data_set, label_set)
    noise_by_class = []
    for eachclass in range(classes):
        spectral_idx = spectral[eachclass]
        noise = np.random.normal(data_mean[eachclass][spectral_idx], data_std[eachclass][spectral_idx], data_num[classes])
        noise_by_class.append(noise)

    data = data_set
    label = label_set
    for eachdata, eachlabel in zip(data_set, label_set):
Example #4
0
def cust():
    global F
    tp = read_data.get_data(CURR, custom=True)
    gd = tp['game_data']
    cd = tp['content_data']
    number = int(request.form['number'])
    data = play_game.play(number, CURR, custom=True)
    print(data)
    if number == 6  :
        F = O
    if "options" not in data:
        F = O
        return data
    ops = data['options']
    tdct = {}
    counter = ['y','n']
    for i in ops:
        tdct[counter[0]] = {
            'val': cd[i],
            'next': ops[i]['next'],
            'more':cd[ops[i]['more']]
        }
        del counter[0]
    res = {
        'question':cd[data['question']],
        'fact':F,
        'chap':cd[data['chapter']],
        'options':tdct
    }
    F = cd[data['fact']]
    print(res)
   # ans = {"question":"Second"}
    return res
Example #5
0
def my_view_func(name):
    global CURR
    CURR = name 
    tp = read_data.get_data(name, custom=True)
    gd = tp['game_data']
    cd = tp['content_data']
    res = play_game.play(1, name, custom=True)
    ops = res['options']
    tdct = {}
    counter = ['y','n']
    for i in ops:
        tdct[counter[0]] = {
            'val': cd[i],
            'next': ops[i]['next'],
            'more':cd[ops[i]['more']]
        }
        del counter[0]
    global F,O
    F,O = [cd[res['fact']]]*2
    res = {
        'question':cd[res['question']],
        'fact':None,
        'chap':cd[res['chapter']],
        'options':tdct
    }
    print(res)
   # ans = {"question":"Second"}
    res['name'] = name
    return render_template('play_custom.html', val=res)
Example #6
0
def get_stock_name_of_template(first_date, last_date, tf, profit, highest_loss):
    stocks = get_stock_active_name_list()
    profit_return = None
    most_loss = None
    # stocks = ['AOT']
    ret_dict = dict()
    ret_dict['symbol'] = []
    ret_dict['profit_return'] = []
    ret_dict['most_loss'] = []
    ret_dict['first_date'] = []
    ret_dict['last_date'] = []
    for symbol in stocks:
        data = get_data(symbol, tf, first_date, last_date)
        try:
            profit_return = utils.calculate_profit(data['close'])
            most_loss = utils.most_loss(data['close'])
        except TypeError:
            pass
        # print(profit_return, most_loss)
        if profit_return is not None and most_loss is not None:
            if profit_return > profit and most_loss < highest_loss:
                print(symbol, profit_return, most_loss)
                ret_dict['symbol'].append(symbol)
                ret_dict['profit_return'].append(profit_return)
                ret_dict['most_loss'].append(most_loss)
                ret_dict['first_date'].append(first_date)
                ret_dict['last_date'].append(last_date)
    return ret_dict
Example #7
0
    def get_data(self):
        with tf.name_scope('data'):
            train_data, test_data = read_data.get_data(self.batch_size)
            iterator = tf.data.Iterator.from_structure(train_data.output_types, train_data.output_shapes)
            self.img, self.label = iterator.get_next()
            #print(self.img)

            self.train_init = iterator.make_initializer(train_data)
            self.test_init = iterator.make_initializer(test_data)
Example #8
0
def get_results_protein():
    train, test = read_data.get_data("Grupa5_data/protein.RData")
    x_train = pd.DataFrame(train.iloc[:, 0:2000])
    x_test = pd.DataFrame(test)

    x_train = read_data.normalize_data(x_train)
    x_test = read_data.normalize_data(x_test)
    y_train = train.iloc[:, 2000]

    # param = {'alpha': 1e-2}
    ridge = Ridge(alpha=0.01)
    cross_validation.cross_validate(x_train, y_train, ridge)
Example #9
0
    def get_data(self):
        with tf.name_scope('data'):
            train_data, test_data = read_data.get_data(self.batch_size,
                                                       self.n_train,
                                                       self.n_test)
            iterator = tf.data.Iterator.from_structure(
                train_data.output_types, train_data.output_shapes)
            self.img, self.labels = iterator.get_next(
            )  # this img has 24 channels!

            self.train_init = iterator.make_initializer(train_data)
            self.test_init = iterator.make_initializer(test_data)
Example #10
0
def get_skyline_count(acc_metric, size_metric, include_stanford=True):
    languages, taggers, acc = read_data.get_data(
        acc_metric, include_stanford=include_stanford)
    size = read_data.get_data(size_metric,
                              include_stanford=include_stanford)[2]

    taggers_by_language = np.repeat(np.array(taggers), len(languages)).reshape(
        (len(taggers), len(languages))).T
    acc_by_language = np.array(acc).T
    size_by_language = np.array(size).T

    skyline_count = {x: 0 for x in taggers}
    for taggers, accs, sizes in zip(taggers_by_language, acc_by_language,
                                    size_by_language):
        zipped = list(zip(taggers, accs, sizes))
        zipped.sort(key=lambda x: x[2])
        models_on_skyline = get_models_on_skyline(zipped)
        for model in models_on_skyline:
            skyline_count[model] += 1

    return taggers, list(skyline_count.values())
Example #11
0
def calculate_lyapunov(unperturbed_file, perturbed_file):  #TODO: remove references to N_calc
    """This function calculates the lyapunov exponent at each integration step between two solutions.
    PARAMETERS:
        unperturbed_file: (string) String object with the file name in which the unperturbed solution is stored
        perturbed_file: (string) String object with the file name in which the perturbed solution is stored
    RETURNS:
        lyaps     : (float[N_steps]) N_steps-dimension array-like containing the values of the lyapunov exponent calcuated between solution1 and solution2 at each integration step for."""
    data_u, mu_u, k_u=read_data.get_data(unperturbed_file,form='lyapunov')
    data_p, mu_p, k_p=read_data.get_data(perturbed_file,form='lyapunov')
    
    lenght=len(data_u)
    try:
      assert lenght==len(data_p)
      assert mu_u==mu_p
      assert k_u==k_p
    except AssertionError:
        print(colors.red|"The integration results are not written as expexted", file=sys.stderr)
        sys.exit([6]) #Integration results are not written as expected
    """ Actual calculation of the Lyapunov exponents"""
    
    """Difference between the two simulations"""
    difference=data_u-data_p                        
    """Norms of the difference vectors"""
    norms=np.linalg.norm(difference, axis=1)
    """Norm of the initial difference"""
    norm_0=norms[0]  
    """Calculating the ln of the ratio between evolved state and initial state"""                               
    log_diff_ratio=np.log(norms/norm_0)
    lyaps=np.empty(lenght)
    cumulative_sum=0
    
    print("Lyapunov exponents calculation:")
    for i in Progress.range(1, lenght):
        """calculating the mean of the ratios from initial state to state i"""
        cumulative_sum+=log_diff_ratio[i]
        lyaps[i]=cumulative_sum/i
        
    return lyaps        
Example #12
0
def main():
    warnings.filterwarnings("ignore", category=FutureWarning)

    #read data
    tweets, labels, tests, test_labels = read_data.get_data(0.8)

    all_data = tweets + tests
    all_labels = labels + test_labels
    #k_fold_Cross_validation.validate(5, all_data, all_labels)

    trainDF = pandas.DataFrame()
    testDF = pandas.DataFrame()
    #
    # #remove noise
    # #pandas dataframe is a 2D array which can have several column_names and supports mathematical operations on rows and columns
    trainDF['text'] = Data_cleaner.remove_noise(tweets)
    trainDF['labels'] = labels
    testDF['tests'] = Data_cleaner.remove_noise(tests)
    testDF['test_labels'] = test_labels
    #print(trainDF)
    # # extract features from text and test
    train_features, test_features = extract_features.get_features_TF_IDF(
        trainDF['text'], testDF['tests'])
    #train_features,test_features = extract_features.word2vec(trainDF['text'],testDF['tests'])
    # # multi layer perceptron
    # clf = MLPClassifier()
    # parameter_space = {
    # 'hidden_layer_sizes': [(50,50,50), (50,100,50), (100,)],
    # 'activation': ['tanh', 'relu', 'logistic'],
    # 'solver': ['sgd', 'adam', 'lbfgs'],
    # 'alpha': [float(x) for x in np.linspace(0.0001, 5, num = 100)],
    # 'learning_rate': ['constant','adaptive']}
    #
    # grid = GridSearchCV(clf, parameter_space, n_jobs=-1, cv=3,verbose=1)
    # grid.fit(train_features, trainDF['labels'])
    # print(grid.best_params_)

    #clf.fit(train_features,trainDF['labels'])
    #p = clf.predict(test_features)

    #print(np.mean(p == testDF['test_labels']))

    # run our first classifier naive naive_bayes
    #naive_bayes.run_naive_bayes(train_features, test_features,trainDF['labels'], testDF['test_labels'])
    #
    #
    # # logistic regression
    #logistic_regression.tune(train_features,trainDF['labels'])
    logistic_regression.run(train_features, test_features, trainDF['labels'],
                            testDF['test_labels'])
Example #13
0
def get_train_feature():
    stocks_all = get_stock_active_name_list()
    stocks_good = get_stock_name_of_growth_more_than_percent_with_period(Decimal(15.0), 90)
    print(stocks_good)
    prev_day = datetime.datetime.strptime(str(datetime.date.today() - datetime.timedelta(90)),
                                          '%Y-%m-%d').strftime('%m/%d/%Y')

    y = []
    feature_rsi_7 = []
    feature_rsi_14 = []
    feature_ema_10 = []
    feature_ema_25 = []
    feature_ema_50 = []
    feature_ema_75 = []

    feature_macd_vs_signal = []
    # print(stocks_good)
    for symbol in stocks_all:
        # print(symbol + ":  " + str(get_data(symbol, 'day', '12/19/2016', '12/19/2016')))
        data = get_data(symbol, 'day', 0, prev_day)
        if data is None:
            pass
            # print('none' + symbol)
        else:
            feature_rsi_7.append([get_rsi_7(data['close'])])
            feature_ema_10.append([data['close'][-1]/float(get_ema(data['close'], 10))])
            feature_ema_25.append([data['close'][-1]/float(get_ema(data['close'], 25))])
            feature_ema_50.append([data['close'][-1]/float(get_ema(data['close'], 50))])
            # feature_ema_75.append([data['close'][-1]/float(get_ema(data['close'], 75))])
            # feature_rsi_14.append([get_rsi_14(data['close'])])
            feature_macd_vs_signal.append([macd_vs_signal(data['close'])])
            if symbol in stocks_good:
                # print('good' + symbol)
                y.append([1])
            else:
                y.append([0])

    features = numpy.hstack([
        numpy.array(feature_rsi_7),
        numpy.array(feature_ema_25),
        numpy.array(feature_ema_10),
        numpy.array(feature_ema_50),
        # numpy.array(feature_ema_75),
        # numpy.array(feature_rsi_14),
        numpy.array(feature_macd_vs_signal)
    ])
    output = numpy.hstack([numpy.array(y)])

    return features, output
def train_model(model, train_path, test_path, num_opochs=5000):
    criterion = torch.nn.CrossEntropyLoss()
    optimizer = optim.SGD(model.parameters(), lr=0.0001, momentum=0.9)
    exp_lr_scheduler = lr_scheduler.StepLR(optimizer, step_size=8, gamma=0.1)
    for epoch in range(num_opochs):
        running_loss = 0.0
        model.train(True)
        exp_lr_scheduler.step()
        lines = list(open(train_path, 'r'))
        steps = len(lines) / batch_size
        random.shuffle(lines)
        for i in range(int(steps)):
            # get the inputs
            inputs, lables = read_data.get_data(lines, batch_size, i)
            # wrap them in Variable
            inputs, lables = Variable(torch.Tensor(inputs)).cuda(), Variable(
                torch.Tensor(lables)).cuda()
            # zero the parameter gradients
            optimizer.zero_grad()
            # forward + backward + optimize
            start_time = time.time()
            outputs = model(inputs)
            end_time = time.time()
            _, predicted = torch.max(outputs, 1)
            lables = lables.to(device=torch.device("cuda:0"),
                               dtype=torch.int64)
            correct = (predicted == lables).sum()
            acc_train = float(correct) / batch_size

            loss = criterion(outputs, lables)
            loss.backward()
            optimizer.step()
            # print statistics
            print('[%d, %5d] loss: %.3f  time:  %.3f' %
                  (epoch + 1, i + 1, loss, end_time - start_time))
            running_loss += loss

            if i % 10 == 0:
                acc = test(model, test_path)
                print('test [%d, %5d] loss: %.3f acc:  %.3f' %
                      (epoch + 1, i + 1, running_loss / 10, acc))
                running_loss = 0.0
                torch.save(
                    model.state_dict(), 'model/model_' + str(epoch) + '_' +
                    str(i) + '_' + str(acc) + '.pkl')

    print('Finished Training')
Example #15
0
def get_results_cancer():
    train, test = read_data.get_data("Grupa5_data/cancer.RData")
    x_train = pd.DataFrame(train.iloc[0:17737, :])
    y_train = train.iloc[17737, :]
    x_test = pd.DataFrame(test)

    x_train = x_train.T
    y_train = y_train.T
    x_test = x_test.T

    print(x_train)
    print(x_test)

    x_train = read_data.normalize_data(x_train)
    x_test = read_data.normalize_data(x_test)

    ridge = Ridge()
    cross_validation.cross_validate(x_train, y_train, ridge)
Example #16
0
def test_steady_state(mu, k, sign):
    """This function tests if the steady states are mantained"""
    if sign:
        sign=+1
    else:
        sign=-1
    x1_0=sign*k
    x2_0=sign/k
    y1_0=mu*k**2
    y2_0=mu*k**-2
    dynamo_test=integrator.dynamo(mu,k,30, [x1_0, x2_0, y1_0], "test_output.csv", 2**-8)
    dynamo_test.evolve(0)
    dynamo_test.write_results()
    t, x1, x2,  y1, y2, mu, k=read_data.get_data("test_output.csv", '1')
    assert (abs(x1[-1]-x1_0)<2*10**-6)
    assert (abs(x2[-1]-x2_0)<2*10**-6)
    assert (abs(y1[-1]-y1_0)<2*10**-6)
    assert (abs(y2[-1]-y2_0)<2*10**-6)
    os.remove("test_output.csv")
Example #17
0
def sim_time(dataname):
    time = data.get_data(dataname)[0]
    energy = data.get_data(dataname)[1]
    obs_ID = data.get_data(dataname)[2]
    t = data.get_data(dataname)[3]
    E = data.get_data(dataname)[4]
    dict = data.get_data(dataname)[5]

    t = np.array(t)

    sim_t = t - t
    for i in range(len(t)):
        sim_t[i] = np.random.random(len(t[i])) * (t[i][-1] - t[i][0]) + t[i][0]
    return sim_t
Example #18
0
def get_time_interval_data(meal, interval=120):

    user = meal[0]
    records, cgm_data = get_data(user)
    start = records['Start'].iloc[meal[1]]

    def get_minutes(tdelta):
        days = tdelta.days
        seconds = tdelta.seconds
        return days * 1440 + seconds / 60.

    records['Start'] = (records['Start'] - start).apply(get_minutes)
    records['Finish'] = (records['Finish'] - start).apply(get_minutes)
    cgm_data['Time'] = (cgm_data['Time'] - start).apply(get_minutes)

    records = records[(records['Start'] >= 0) & (records['Start'] <= interval)]

    cgm_data = cgm_data[(cgm_data['Time'] >= 0)
                        & (cgm_data['Time'] <= interval)]

    return records, cgm_data
Example #19
0
def grid_search():
    train, test = read_data.get_data("Grupa5_data/protein.RData")
    x_train = pd.DataFrame(train.iloc[:, 0:2000])
    x_test = pd.DataFrame(test)

    x_train = read_data.normalize_data(x_train)
    x_test = read_data.normalize_data(x_test)
    y_train = train.iloc[:, 2000]

    ridge = Ridge()
    param = {'alpha': [1e-4, 1e-2, 1, 5, 10]}
    ridge_regr = GridSearchCV(ridge,
                              param,
                              scoring="neg_mean_squared_error",
                              cv=10)
    ridge_regr.fit(x_train, y_train)

    print(ridge_regr.best_params_)
    print(ridge_regr.best_estimator_)
    print(ridge_regr.cv_results_)
    print(ridge_regr.best_index_)
Example #20
0
def get_test_feature():
    feature_rsi_7 = []
    feature_rsi_14 = []
    feature_ema_10 = []
    feature_ema_25 = []
    feature_ema_50 = []
    feature_ema_75 = []
    feature_macd_vs_signal = []
    stocks_all = get_stock_active_name_list()
    for symbol in stocks_all:
        # print(symbol + ":  " + str(get_data(symbol, 'day', '12/19/2016', '12/19/2016')))
        data = get_data(symbol, 'day', 0, '12/19/2016')
        # print(data)
        if data is None:
            pass
            # print('none' + symbol)
        elif data is not None:
            feature_rsi_7.append([get_rsi_7(data['close'])])
            # feature_rsi_14.append([get_rsi_14(data['close'])])
            feature_ema_10.append([data['close'][-1]/float(get_ema(data['close'], 10))])
            feature_ema_25.append([data['close'][-1]/float(get_ema(data['close'], 25))])
            feature_ema_50.append([data['close'][-1]/float(get_ema(data['close'], 50))])
            # feature_ema_75.append([data['close'][-1]/float(get_ema(data['close'], 75))])
            feature_macd_vs_signal.append([macd_vs_signal(data['close'])])

    features = numpy.hstack([
        numpy.array(feature_rsi_7),
        numpy.array(feature_ema_25),
        numpy.array(feature_ema_10),
        numpy.array(feature_ema_50),
        # numpy.array(feature_ema_75),
        # numpy.array(feature_rsi_14),
        numpy.array(feature_macd_vs_signal)
    ])

    return numpy.asarray(features)
Example #21
0
def act():
    global CURR
    CURR = None
    global FACT
    tp = read_data.get_data('ww1_f')
    gd = tp['game_data']
    cd = tp['content_data']
    number = int(request.form['number'])
    data = play_game.play(number, 'ww1_f')
    # print(data)
    if number == 6  :
        FACT = OG
    if "options" not in data:
        FACT = OG
        print(data)
        return data
    ops = data['options']
    tdct = {}
    counter = ['y','n']
    for i in ops:
        tdct[counter[0]] = {
            'val': cd[i],
            'next': ops[i]['next'],
            'more':cd[ops[i]['more']]
        }
        del counter[0]
    res = {
        'question':cd[data['question']],
        'fact':FACT,
        'chap':cd[data['chapter']],
        'options':tdct
    }
    FACT = cd[data['fact']]
    # print(res)
   # ans = {"question":"Second"}
    return res
def run_sig_processing(data_src, labels_src, band_type):
    # parameters initialization
    start_time = 3
    time_slides = 0.2
    window_length = 2
    segments_num = 11

    data, labels, sfreq = get_data(data_src, labels_src)

    # execute
    preprocessed_data = dict()
    for subject in data:
        if subject not in preprocessed_data:
            preprocessed_data[subject] = dict()
        for session in data[subject]:
            df_trials_data = pd.DataFrame()
            for channel in data[subject][session]:
                session_data = data[subject][session][channel]
                trials_processed_data = list()
                for trial_data in session_data:
                    processed_data = preprocess_signal(trial_data, start_time,
                                                       time_slides,
                                                       window_length,
                                                       segments_num, sfreq)
                    trials_processed_data.append(processed_data)
                df_trials_data[channel] = trials_processed_data
            # print(df_trials_data)
            # pause = input("pause: ")
            preprocessed_data[subject][session] = df_trials_data

    if band_type == 0 or band_type == 1:
        combined_data, combined_labels = combine_processed_data(
            preprocessed_data, labels)
        mu_band = feature_band_selection(combined_data,
                                         combined_labels,
                                         sfreq,
                                         step=1,
                                         band_range=(4, 14),
                                         band_size=(4, 5, 6),
                                         features_type=band_type)
        beta_band = feature_band_selection(combined_data,
                                           combined_labels,
                                           sfreq,
                                           step=1,
                                           band_range=(16, 40),
                                           band_size=(4, 5, 6),
                                           features_type=band_type)
    else:
        mu_band = dict()
        beta_band = dict()
        for subject in preprocessed_data:
            mu_band[subject] = (4, 14)
            beta_band[subject] = (16, 40)

    # get input data of CNN, add to column of dataFrame form processed_data[subject][session]
    for subject in preprocessed_data:
        for session in preprocessed_data[subject]:
            preprocessed_data[subject][session]['input data'] \
                = preprocessed_data[subject][session].apply(get_input_data, axis=1,
                                                            mu_band=mu_band[subject], beta_band=beta_band[subject])

    return preprocessed_data, labels
Example #23
0
from math import sqrt
from read_data import get_data
from metric_functions import get_kernel, get_distance
from f_score import get_f_score
from draughtsman import draw

filename = 'dataset_191_wine.csv'

X, Y = get_data(filename)

h_max = int(sqrt(abs(len(X))))

DISTANCE_NAMES = ['manhattan', 'euclidean', 'chebyshev']
KERNEL_NAMES = ['uniform', 'triangular', 'epanechnikov', 'quartic']
WINDOW_TYPES = ['fixed', 'variable']

max_f_score = -1
win_kernel = ""
win_distance = ""
win_window = ""

for window_type in WINDOW_TYPES:
    print('Window type: ' + window_type)
    is_fixed = window_type == 'fixed'

    for distance_name in DISTANCE_NAMES:
        print('    ' * 1 + 'Distance function: ' + distance_name)
        distance = get_distance(distance_name)

        for kernel_name in KERNEL_NAMES:
            print('    ' * 2 + 'Kernel function: ' + kernel_name)
Example #24
0
parser.add_argument('--train', default=True, type=bool, help='train the model')

opt = parser.parse_args()

if opt.use_cuda:
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
# '''

base_dir = os.path.join('..', 'input', 'skin-cancer-mnist-ham10000')
all_image_path = glob(os.path.join(base_dir, '*', '*.jpg'))
imageid_path_dict = {
    os.path.splitext(os.path.basename(x))[0]: x
    for x in all_image_path
}

df_train, df_val = get_data(base_dir, imageid_path_dict)

normMean, normStd = compute_img_mean_std(all_image_path)

model = models.resnext101_32x8d(pretrained=True)
model.fc = nn.Linear(in_features=2048, out_features=7)

model.to(device)

input_size = 224
train_transform = transforms.Compose([
    transforms.Resize((input_size, input_size)),
    transforms.RandomHorizontalFlip(),
    transforms.RandomVerticalFlip(),
    transforms.RandomRotation(20),
    transforms.ColorJitter(brightness=0.1, contrast=0.1, hue=0.1),
Example #25
0
import numpy as np
import sys
sys.path.append("../network/")
sys.path.append("../")

from read_data import get_data
from sklearn.ensemble import RandomForestClassifier
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import cross_validate, train_test_split, KFold
from metrics import gain_chart, prob_acc
from sklearn.tree import DecisionTreeClassifier
from resampling import Resample

X, Y = get_data()
Y = Y.flatten()
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.5)
#r = Resample(X_train, Y_train)
#X_train, Y_train = r.Over()

clf_rf = RandomForestClassifier(n_estimators=100,
                                max_depth=8,
                                min_samples_split=100)

clf_rf.fit(X_train, Y_train)

ypred_test = clf_rf.predict_proba(X_test)

gain_chart(Y_test, ypred_test)
prob_acc(Y_test, ypred_test)

clf_dt = DecisionTreeClassifier(max_depth=6, min_samples_split=200)
Example #26
0
import codecs
import read_data as rd
import process_data as pd

DIR = r"/home/chixiao/projects/ECEI/"

if __name__ == "__main__":
    data = rd.get_data(DIR, [0, 0.0001], 0.0001)
    print(data[:, :, -1])
    print(data[:, :, -2])
    pd.show_data(data)
Example #27
0
import numpy as np
from read_data import get_data
import sys
sys.path.append("network/")
from NN import NeuralNet

X, Y = get_data(normalized=False, standardized=True, file='droppedX6-X11.csv')
nn = NeuralNet(X,
               Y.flatten(),
               nodes=[18, 50, 50, 2],
               activations=['tanh', 'tanh', None],
               cost_func='log',
               regularization='l2',
               lamb=0.001)

nn.split_data(frac=0.5, shuffle=True, resample=True)
nn.TrainNN(epochs=1000, batchSize=200, eta0=0.01, n_print=10)
Example #28
0
from read_data import get_data, balance_data
import os
from sklearn.metrics import accuracy_score
from sklearn.metrics import cohen_kappa_score
from sklearn.metrics import recall_score
from sklearn.metrics import f1_score
import random
LOG_DIR = './bitcorn_model2'
TIME_STEP = 6  # 滑动窗口大小
BATCH_SIZE = 4096  # 每次送入训练的样本数量

CHANNEL = 5  # 送入训练练的维数
LEARNING_RATE = 0.003  # 学习率
EPOCH = 20  # 训练的轮数
file_name = 'USDT_BTC 5min(2015-2018).csv'  # 训练文件名
data_x, data_y = get_data(file_name, TIME_STEP)
data_y = np.array(data_y).astype(np.int)
data_x = np.array(data_x)
# 割训练集与测试集
len_train = int(len(data_y) * 0.7)
len_train_test = int(len(data_y) * 0.85)
train_x = data_x[:len_train]
train_y = data_y[:len_train]
test_x = data_x[len_train:]
test_y = data_y[len_train:]
val_x = data_x[len_train_test:]
val_y = data_y[len_train_test:]
print(train_x.shape)
graph = tf.Graph()
with graph.as_default():
    input_x = tf.placeholder(tf.float32, [None, TIME_STEP, CHANNEL])
Example #29
0
#!/usr/bin/python3

import sys
sys.path.append("..")
# sys.path.append("..")
from sklearn.tree import DecisionTreeClassifier
from custom_classes.ribes_RFFSampler import ribes_RFFSampler
from sklearn.kernel_approximation import RBFSampler

from read_data import get_data

train_data, train_predictions, test_data, test_predictions = get_data()
train_predictions, test_predictions = train_predictions.ravel(
), test_predictions.ravel()

desired_components = 16

sampler = RBFSampler(n_components=desired_components)
sampler.fit(train_data)
mapped_train_data = sampler.transform(train_data)
mapped_test_data = sampler.transform(test_data)
arbol = DecisionTreeClassifier()
arbol.fit(mapped_train_data, train_predictions)
test_score = arbol.score(mapped_test_data, test_predictions)
print(test_score)

# print(train_data[1:10, :])
# print(mapped_train_data[1:10,:])

# sampler = RBFSampler(n_components = desired_components)
# sampler.fit(train_data)
Example #30
0
import matplotlib.pyplot as plt

from normalize import normalize
from read_data import get_data
from train import calc_real_wei

try:
    with open('model/weights') as f:
        a, b = list(map(float, f.read().split('\n')))
except:
    print("Train first")
    exit()

x, y = normalize(*get_data())
real_a, real_b = calc_real_wei(x, y)

pred = [a * el + b for el in x]
real_pred = [real_a * el + real_b for el in x]

fig, ax = plt.subplots(figsize=(12, 8))
ax.plot(x, pred, 'r', label='Prediction')
ax.plot(x, real_pred, 'b', label='Real line')
ax.scatter(x, y, label='Traning Data')
ax.legend(loc=2)
ax.set_xlabel('Milleage')
ax.set_ylabel('Price')
ax.set_title('Predicted price vs. milleage')
plt.show()
def main():
    cross_path = 'data/timit/timit/cross/'
    test_path = 'data/timit/timit/test/'
    train_path = 'data/timit/timit/train/'
    #pre word
    data = get_data(cross_path)
    feature = data['features']
    phoneme = data['phonemes']
    word = data['words']

    testdata = get_data(cross_path)
    testfeature = data['features']
    testphone = data['phonemes']
    testword = data['words']

    pdict, wdict=getDictionary(phoneme, word)
    phoneme = proRawData(phoneme, pdict)
    word = proRawData(word, wdict)

    testphone = proRawData(testphone, pdict)
    testword = proRawData(testword, wdict)

    #feature->phoneme
    np.random.seed(0)
    memcell = 100
    dimx = 12
    dimy = 61

    lstmweight = LstmWeight(memcell, dimx)
    softmaxweight = Weight(memcell, dimy)
    lstmnetwork = LSTMLayer(lstmweight)
    softmaxnetwork = SoftMaxLayer(softmaxweight)
    ####################Training#######################
    epoch = 0
    maxloss = 0
    for count in range(200):
        loss = 0
        for item in range(len(feature)):
            for content in feature[item]:
                lstmnetwork.xlistAdd(content)
            output = lstmnetwork.getHmatrix()
            softmaxnetwork.outputAdd(output.T)
            ymatrix = softmaxnetwork.getYmatrix()
            ctclayer = CTC(ymatrix.T, phoneme[item])
            do,tmp = ctclayer.returndY()
            loss += tmp
            softmaxnetwork.ylist(do)
            hmatrix = softmaxnetwork.getdHmatrix()
            lstmnetwork.ylist(hmatrix.T)
            lstmnetwork.xlistRefresh()
            softmaxnetwork.outputRefresh()
            softmaxweight.changeWeight(0.001)
            lstmweight.changeWeight(0.001)
        print(loss)
        if loss>maxloss:
            maxloss=loss
    ####################Testing#######################
    for item in range(len(testfeature)):
        for content in testfeature[item]:
            lstmnetwork.xlistAdd(content)
        print('label',testphone[item])
        output = lstmnetwork.getHmatrix()
        lstmnetwork.xlistRefresh()
        print('predict',softmaxnetwork.predict(output.T))