Beispiel #1
0
def model_prediction(symbol):
    prediction = []
    scaler = None
    for i in range(0, c.NUM_TRAIN):
        price_data, covid_data = prepare_data(c.SAVED_CSV_PATH.format(symbol),
                                              shuffle=c.SAMPLE_SHUFFLE)
        scaler = covid_data["scaler"]
        print(f"Executing for the {i}th time")
        model = build_overall_model(price_data, covid_data, batch_size=10)
        fit_overall_model(model, price_data, covid_data)

        price_data, covid_data = prepare_data(c.SAVED_CSV_PATH.format(symbol),
                                              shuffle=False)

        print(f"Making {i}th Prediction")

        if i == 0:
            prediction = predict_overall_model(model, price_data, covid_data)
        else:
            prediction += predict_overall_model(model, price_data, covid_data)
    prediction = np.array(prediction / c.NUM_TRAIN)
    label = np.array(covid_data["y_test"][:, 2]).reshape(-1, 1)
    pyplot.plot(
        scaler.inverse_transform(prediction[:, 0].reshape(-1, 1)),
        label="prediction_first",
    )
    pyplot.plot(
        scaler.inverse_transform(prediction[:, c.PREDICTION_STEP - 1].reshape(
            -1, 1)),
        label="prediction_last",
    )
    pyplot.plot(scaler.inverse_transform(label), label="actual")
    pyplot.legend()
    pyplot.show()
Beispiel #2
0
def mlformat(dir, segment_size):
    # read in audio files, take the fft, and return the normalized fft plus
    # the class

    mlsamples = []
    mlclasses = []

    flst = sorted(os.listdir(dir))      # list the available audio files

    for angle in [
            '00', '01', '02', '03', '04', '05', '06', '07', '08', '09', '10', '11',
            '12', '13', '14', '15', '16', '17', '18', '19', '20', '21', '22', '23',
            '24', '25', '26', '27', '28', '29', '30', '31', '32', '33', '34', '35',
            '36', '37', '38', '39', '40', '41', '42', '43', '44', '45', '46', '47',
            '48', '49', '50', '51', '52', '53', '54', '55', '56', '57', '58', '59',
            '60', '61', '62', '63', '64', '65', '66', '67', '68', '69', '70', '71',
            '72', '73', '74', '75', '76', '77', '78', '79', '80', '81', '82', '83',
            '84', '85', '86', '87', '88', '89', '90']:
        for filename in flst:
            if fnmatch.fnmatch(filename, '%sdeg*' % angle):
                samplerate, samples = read(dir + '%s' % filename)
                sample, label = preprocessing.prepare_data(samples, samplerate, angle, segment_size)    # take the fft
                mlsamples.extend(sample[0:1])
                mlclasses.extend(label[0:1])         # the class of each fft

    return mlsamples, mlclasses
def train():
    x_train, x_test, y_train, y_test = prepare_data()
    lasso = linear_model.Lasso()
    lasso.fit(x_train, y_train)
    y_pred = lasso.predict(x_test)

    return mean_squared_error(y_test, y_pred ) ** 0.5
Beispiel #4
0
def input_up(sess):
    data = preprocessing.prepare_data(dataset='Test')
    print(len(data))
    sub_input_sequence = []
    sub_label_sequence = []
    padding = abs(config.image_size - config.label_size) // 2  # 6
    input_,label_ = preprocessing.preprocess(data[0],config.scale)
    if len(input_.shape) == 3:
        h, w, _ = input_.shape
    else:
        h, w = input_.shape
    nx = 0  # 后注释
    ny = 0  # 后注释
    # 自图需要进行合并操作
    for x in range(0, h - config.image_size + 1, config.stride):  # x从0到h-33+1 步长stride(21)
        nx += 1
        ny = 0
        for y in range(0, w - config.image_size + 1, config.stride):  # y从0到w-33+1 步长stride(21)
            ny += 1
            # 分块sub_input=input_[x:x+33,y:y+33]  sub_label=label_[x+6,x+6+21, y+6,y+6+21]
            sub_input = input_[x:x + config.image_size, y:y + config.image_size]  # [33 x 33]
            sub_label = label_[x + padding:x + padding + config.label_size,
                        y + padding:y + padding + config.label_size]  # [21 x 21]
            sub_input = sub_input.reshape([config.image_size, config.image_size, 1])
            sub_label = sub_label.reshape([config.label_size, config.label_size, 1])
            sub_input_sequence.append(sub_input)
            sub_label_sequence.append(sub_label)
    # 上面的部分和训练是一样的
    arrdata = np.asarray(sub_input_sequence)  # [?, 33, 33, 1]
    arrlabel = np.asarray(sub_label_sequence)  # [?, 21, 21, 1]
    make_data(arrdata, arrlabel)  # 存成h5格式
    return nx,ny
Beispiel #5
0
def load_external_data(link: str) -> Tuple[pd.DataFrame, List[str], Exception]:
    """ Load data from a link and preprocess it

    Parameters:
    -----------

    link : str
        Link to the data (should be hosted online)

    Returns:
    --------

    df : pandas.core.frame.DataFrame | False
        The data loaded and preprocessed.
        If there is an issue loading/preprocessing then it
        returns False instead.

    player_list : list | False
        List of players that have been in any board game match.
        If there is an issue with loading/preprocessing the data
        then it returns False instead.

    exception : False | Exception
        If there is something wrong with preprocessing,
        return Exception, otherwise return False
    """

    exception = False
    try:
        df, player_list = preprocessing.prepare_data(link)
        return df, player_list, exception
    except Exception as exception:
        return False, False, exception
Beispiel #6
0
def main():

    torch.manual_seed(777)
    device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')

    parser = argparse.ArgumentParser()

    parser.add_argument("--path", type=str)
    parser.add_argument("--embedding_dim", type=int, default=300)
    parser.add_argument("--iterator", type=int, default=10)
    parser.add_argument("--lr", type=float, default=1e-5)
    parser.add_argument("--decay", type=float, default=0.01)
    parser.add_argument("--batch_size", type=int, default=100)

    args = parser.parse_args()

    trg, src = load_pair(args.path)

    src_token = eng_tokenize(src)
    trg_token = es_tokenize(trg)
    trg2idx, idx2_trg = make_dictionary(trg_token)
    src2idx, idx2src = make_dictionary(src_token)
    src_ix = make_src_idx(src_token, src2idx)
    trg_ix = make_trg_idx(trg_token, trg2idx)

    args.embedding_dim

    # model 선언부
    encoder = EncoderGRU(emb_dim=args.embedding_dim,
                         bidirectional=True,
                         vocab_size=len(src2idx))
    attention = Attention(emb_dim=args.embedding_dim, padding_idx=0)

    decoder = DecoderGRU(emb_dim=args.embedding_dim,
                         attention=attention,
                         n_class=len(trg2idx))
    model = Seq2Seq_a(encoder, decoder, device, trg2idx)

    num_parameter(model)

    #loss , optimizer 설정
    loss_func = nn.CrossEntropyLoss(ignore_index=0)
    optimizer = optim.RMSprop(model.parameters(),
                              lr=args.lr,
                              weight_decay=args.decay)

    #data 나누기

    train_loader, test_loader = prepare_data(src=src_ix,
                                             trg=trg_ix,
                                             test_size=0.2,
                                             batch_size=args.batch_size)
    train(model,
          iterator=args.iterator,
          optimizer=optimizer,
          criterion=loss_func,
          train_loader=train_loader,
          visual_path="ssibal",
          trg2idx=trg2idx,
          savepath="./seq2seq_model.pth")
Beispiel #7
0
def main():
    # items_to_predict = item_selection()
    # items_to_predict = select_sorted_items(items_to_predict)
    items_to_predict = ['Amulet_of_strength', "Green_d'hide_vamb", 'Staff_of_fire', 'Zamorak_monk_top', 'Staff_of_air', \
      'Adamantite_bar', 'Zamorak_monk_bottom', 'Adamant_platebody', 'Runite_ore', 'Rune_scimitar', 'Rune_pickaxe', \
        'Rune_full_helm', 'Rune_kiteshield', 'Rune_2h_sword', 'Rune_platelegs', 'Rune_platebody', 'Old_school_bond']
    num_features = 2

    for item_to_predict in items_to_predict:
        # =========== PREPROCESSING ===========
        # SELECT ITEMS
        items_selected = item_selection()

        # FEATURE EXTRACTION
        preprocessed_df = prepare_data(item_to_predict, items_selected)

        # FEATURE SELECTION & NORMALIZATION
        selected_df, pred_std, pred_mean = regression_f_test(
            preprocessed_df, item_to_predict, number_of_features=num_features)
        # print(selected_df.head())
        # print(selected_df.shape)
        # print("columns with nan: {}".format(selected_df.columns[selected_df.isna().any()].tolist()))

        # =========== UNIVARIATE ===========
        uni_config = {}
        # TRAINING AND SAVING MODEL
        univariate_rnn(selected_df, item_to_predict)

        # # LOADING AND APPLYING MODEL
        # loaded_model = tf.keras.models.load_model('models/{}_uni_model.h5'.format(item_to_predict))
        # apply_univariate_test(selected_df, item_to_predict, loaded_model, pred_std, pred_mean)

        # =========== MULTIVARIATE SINGLE STEP ===========
        multiS_config = {
            'lstm_units': 64,
            'EVALUATION_INTERVAL': 300,
            'EPOCHS': 10,
            'learning_rate': 0.0001,
            'num_dropout': 2
        }
        # TRAINING AND SAVING MODEL
        multivariate_rnn_single(selected_df, item_to_predict, **multiS_config)

        # # LOADING AND APPLYING MODEL
        # loaded_model = tf.keras.models.load_model('models/{}_multiS_model.h5'.format(item_to_predict))
        # apply_multivariate_single_step_test(selected_df, item_to_predict, loaded_model, pred_std, pred_mean)

        # =========== MULTIVARIATE MULTI STEP ===========
        multiM_config = {
            'lstm_units': 128,
            'EVALUATION_INTERVAL': 400,
            'EPOCHS': 15,
            'learning_rate': 0.0001,
            'num_dropout': 2
        }
        # TRAINING AND SAVING MODEL
        multivariate_rnn_multi(selected_df, item_to_predict, **multiM_config)
Beispiel #8
0
def run(texts):
    preprocessed = [prepare_data(text) for text in texts]
    tfidf_scores_transformed = calculate_tf_idf(preprocessed)
    tfidf_scores = calculate_tf_idf2(preprocessed)
    print('Are the two tf-idf scores the same?',
          check_tfidf_similarity(tfidf_scores_transformed[0], tfidf_scores[0]))
    print_results(tfidf_scores[0], tfidf_scores[1], len(preprocessed))
    print('-----------------------------------')
    print_results(tfidf_scores_transformed[0], tfidf_scores_transformed[1],
                  len(preprocessed))
Beispiel #9
0
 def recognize_and_display_result(self):
     '''
     识别左边用户画好的图并将结果显示在右边的区域
     :return:
     '''
     self.reset_result_area()
     data = preprocessing.prepare_data(self.drawing_area)
     num = self.infer.inference_once(data)
     self.draw_num(num)
     # 结果展示完成后需要重置绘图区域以等待用户下一次绘图
     self.to_reset_drawing_area = True
Beispiel #10
0
def basic():
    train, test = preprocessing.prepare_data(True)

    with open('nn_resultsfeaturedrop_nosmote.csv', 'w') as csvfile:
        writer = csv.writer(csvfile)
        # add header
        # vary each parameter of random forest
        split_train, split_labels = preprocessing.split_labels(train)
        split_train, split_labels = preprocessing.apply_smote(
            split_train, split_labels)

        nn_predict(split_train, split_labels, test, writer, {})
Beispiel #11
0
def full_hyperparameter_tuning():
    # items_to_predict = ['Old_school_bond', 'Rune_platebody', 'Rune_2h_sword', 'Rune_axe',\
    # 	'Rune_pickaxe', 'Adamant_platebody', 'Amulet_of_power']
    items_to_predict = item_selection()
    items_to_predict = select_sorted_items(items_to_predict)
    min_features = 2
    max_features = 4
    for item_to_predict in items_to_predict:
        for num_features in range(min_features, max_features):
            # SELECT ITEMS
            items_selected = item_selection()

            # FEATURE EXTRACTION
            preprocessed_df = prepare_data(item_to_predict, items_selected)

            # FEATURE SELECTION & NORMALIZATION
            selected_df, pred_std, pred_mean = regression_f_test(
                preprocessed_df,
                item_to_predict,
                number_of_features=num_features)
            # print(selected_df.head())

            # define the grid search parameters
            batch_size = [16, 32, 64, 128]
            buffer_size = [30, 50, 100]
            epochs = [20, 40]
            eval_interval = [100, 400]
            num_dropout_layers = [1, 2, 3]
            num_lstm_units = [32, 64, 128]
            learning = [0.0001]
            past_history = [30, 50]

            # multivariate_rnn_multi_hyperparameter_tuning(selected_df, item_to_predict, eval_interval=eval_interval, \
            # 	learning=learning, past_history=past_history, epochs=epochs, num_lstm_units=num_lstm_units, batch_size=batch_size,\
            # 		 buffer_size=buffer_size, num_dropout_layers=num_dropout_layers)
            # multivariate_rnn_single_hyperparameter_tuning(selected_df, item_to_predict, eval_interval=eval_interval, \
            # 	learning=learning, past_history=past_history, epochs=epochs, num_lstm_units=num_lstm_units, batch_size=batch_size,\
            # 		buffer_size=buffer_size, num_dropout_layers=num_dropout_layers)
            # univariate_rnn_hyperparameter_tuning(selected_df, item_to_predict, batch_size = batch_size, epochs= epochs, \
            # 	past_history=past_history, num_lstm_units=num_lstm_units, eval_interval=eval_interval)

            multivariate_rnn_single_hyperparameter_tuning(selected_df, item_to_predict, \
             num_lstm_units=[128], past_history=[30], eval_interval=[400], num_dropout_layers=[2], learning = [0.0001])
            # multivariate_rnn_multi_hyperparameter_tuning(selected_df, item_to_predict, \
            # 	num_lstm_units=num_lstm_units, past_history=past_history, eval_interval=eval_interval)
            # univariate_rnn_hyperparameter_tuning(selected_df, item_to_predict, \
            # 	past_history=range(30,50,5), num_lstm_units=[8], eval_interval=eval_interval)

            # univariate_rnn_hyperparameter_tuning(selected_df, item_to_predict)

            del selected_df
            del preprocessed_df
            gc.collect()
def basic():
    train, test = preprocessing.prepare_data(True)
    train = train.drop('Amount', axis=1)
    test = test.drop('Amount', axis=1)

    with open('nn_results.csv', 'w') as csvfile:
        writer = csv.writer(csvfile)
        split_train, split_labels = preprocessing.split_labels(train)
        nn_predict(split_train, split_labels, test, writer)

        split_train, split_labels = preprocessing.apply_smote(
            split_train, split_labels)
        nn_predict(split_train, split_labels, test, writer)
Beispiel #13
0
def basic():
    train, test = preprocessing.prepare_data()

    with open('final_results_basic.csv', 'w') as csvfile:
        writer = csv.writer(csvfile)
        # add header
        writer.writerow([
            'class_weight', 'min_samples_split', 'n_estimators', 'vroc_auc',
            'vprecision', 'vrecall', 'vf1', 'vfp', 'vfn', 'troc_auc',
            'tprecision', 'trecall', 'tf1', 'tfp', 'tfn'
        ])
        # vary each parameter of random forest
        rf_predict(train, test, 'basic', writer)
Beispiel #14
0
def smote_test():
    train, test = preprocessing.prepare_data()

    with open('final_results_smote.csv', 'w') as csvfile:
        writer = csv.writer(csvfile)
        # add header
        writer.writerow([
            'max_depth', 'n_estimators', 'min_samples_split', 'class_weight',
            'max_features'
            'vroc_auc', 'vprecision', 'vrecall', 'vf1', 'vfp', 'vfn',
            'troc_auc', 'tprecision', 'trecall', 'tf1', 'tfp', 'tfn'
        ])

        rf_predict(train, test, 'smote', writer)
def multiple_balanced_sets():
    train, test = preprocessing.prepare_data()
    train_list = preprocessing.multiple_balanced_samples(train, 5)
    # separate class label (last column)
    for i in range(5):
        train, labels = preprocessing.split_labels(train_list[i])
        classifier = linear_model.LogisticRegression()
        validation.cross_validate_set(classifier, train, labels)

    validation.cross_validate(classifier, train, labels)
    classifier.fit(train, labels)

    # test
    test, test_labels = preprocessing.split_labels(test)
    validation.test(classifier, test, test_labels)
Beispiel #16
0
def create_data():
    questions, answers, vocab_size, tokenizer, start_tk, end_tk = preprocessing.prepare_data(
    )
    dataset = tf.data.Dataset.from_tensor_slices(({
        'inputs': questions,
        'dec_inputs': answers[:, :-1]
    }, {
        'outputs': answers[:, 1:]
    }))
    dataset = dataset.cache()
    dataset = dataset.shuffle(BUFFER_SIZE)
    dataset = dataset.batch(BATCH_SIZE)
    dataset = dataset.prefetch(tf.data.experimental.AUTOTUNE)
    print("\n\n\n", dataset)
    return dataset, (vocab_size, tokenizer, start_tk, end_tk)
Beispiel #17
0
def main():

    ######### args parameters

    trg, src = load_pair(args.path)
    src_token = eng_tokenize(src)
    trg_token = es_tokenize(trg)

    ###############################################
    trg2idx, idx2trg = make_dictionary(trg_token)

    src2idx, idx2src = make_dictionary(src_token)

    src_convert = convert(word2idx=src2idx, idx2word=idx2src)

    trg_convert = convert(word2idx=trg2idx, idx2word=idx2trg)

    src_ix = src_convert.from_seq2idx(src_token)

    trg_ix = trg_convert.from_seq2idx(trg_token)

    train_loader, test_loader = prepare_data(src=src_ix,
                                             trg=trg_ix,
                                             test_size=args.test_size,
                                             batch_size=args.batch_size,
                                             y_vocab=trg2idx)

    #loss , optimizer 설정
    loss_func = nn.CrossEntropyLoss(ignore_index=0)
    model = ConvS2S(src_size=len(src2idx),
                    tgt_size=len(trg2idx),
                    N=args.num_of_layer,
                    d_vector=512,
                    k_size=3,
                    device=device)
    optimizer = optim.Adam(model.parameters(), weight_decay=args.weight_decay)

    train(model=model,
          iterator=args.iterator,
          optimizer=optimizer,
          criterion=loss_func,
          train_loader=train_loader,
          test_loader=test_loader)
def basic():
    train, test = preprocessing.prepare_data(True)
    train = train.drop('Amount', axis=1)
    test = test.drop('Amount', axis=1)

    with open('nn_resultsepsilon_nosmote.csv', 'w') as csvfile:
        writer = csv.writer(csvfile)
        # add header
        # vary each parameter of random forest
        num_layers_1 = 20
        num_layers_2 = 0
        num_layers_3 = 0
        alpha = 1.e-3
        for epsilon in [1.e0, 1.e-1, 1.e-2, 1.e-3, 1.e-4, 1.e-5]:
            if num_layers_2 == 0 and num_layers_3 != 0:
                continue
            split_train, split_labels = preprocessing.split_labels(train)
            params = {'epsilon': epsilon}
            nn_predict(split_train, split_labels, test, writer, params)
def basic():
    train, test = preprocessing.prepare_data(True)
    train = train.drop('Amount', axis=1)
    test = test.drop('Amount', axis=1)

    with open('nn_resultslayer2_nosmote.csv', 'w') as csvfile:
        writer = csv.writer(csvfile)
        # add header
        # vary each parameter of random forest
        num_layers_1 = 20
        num_layers_2 = 0
        num_layers_3 = 0
        alpha = 1.e-3
        for num_layers_2 in range(1, 30):
            if num_layers_2 == 0 and num_layers_3 != 0:
                continue
            split_train, split_labels = preprocessing.split_labels(train)
            params = {'layer_2': num_layers_2}
            nn_predict(split_train, split_labels, test, writer, params)
def run(dataset, data_path, model_type, generations, populations):

    (df, features, label, categorical_features,
     sensitive_features) = prepare_data(dataset, data_path)
    X, y = process_categorical(df, features, label, categorical_features)
    (X_train, X_test, y_train, y_test) = prepare_data_split(X, y)

    # split_func = split_on_sensitive_attr(X_train)

    model = Classifier(
        dataset,
        model_type,
        X_train,
        y_train,
        X_test,
        y_test,
        features,
        sensitive_features,
    )
    model.fit()

    nsga_cfg = NSGAConfig(
        generations=generations,
        populations=populations,
        model_type=model_type,
        X_sensitive_a1=model.X_m,
    )

    X_m = model.X_m
    X_f = model.X_f
    y_m = model.y_m
    y_f = model.y_f

    X_test_m = model.X_test_m
    X_test_f = model.X_test_f
    y_test_m = model.y_test_m
    y_test_f = model.y_test_f
    try:
        run_nsga(nsga_cfg)
    except Exception as e:
        pass
Beispiel #21
0
def encode_sentences(curr_model, pair, batch_size=128, test=False):
    """
    Encode sentences into the joint embedding space
    """
    en_feats = numpy.zeros((len(pair[0]), curr_model['options']['dim']), dtype='float32')
    cn_feats = numpy.zeros((len(pair[0]), curr_model['options']['dim']), dtype='float32')

    data_index = prepare_data(pair, curr_model['worddict'], test=test)
    cur = 0
    for en, cn, en_lengths, cn_lengths, en_index, cn_index in data_generator(data_index, batch_size):
        en, cn = curr_model['en_cn_model'].forward(en, en_lengths, en_index, cn, cn_lengths, cn_index)
        en = en.data.cpu().numpy()
        cn = cn.data.cpu().numpy()
        for i in xrange(batch_size):
            if i + cur >= len(pair[0]):
                break
            for j in xrange(curr_model['options']['dim']):
                en_feats[i + cur][j] = en[i][j]
                cn_feats[i + cur][j] = cn[i][j]
        cur += batch_size
    en_feats = Variable(torch.from_numpy(en_feats).cuda())
    cn_feats = Variable(torch.from_numpy(cn_feats).cuda())
    return en_feats, cn_feats
Beispiel #22
0
        w = option == "-write"
        r = option == "-read"
        args = sys.argv[2:]

    # Load embeddings
    (word_ids, embeddings) = pickle.load(open(args[2], "rb"))

    # Load or pre-process data
    if r:
        train_data = pickle.load(open(args[0], "rb"))
        valid_data = pickle.load(open(args[1], "rb"))
    else:
        char_map = Numberer()
        label_map = Numberer()
        emolex = read_emolex(args[3])
        train_data = prepare_data(args[0], word_ids, emolex, char_map,
                                  label_map)
        valid_data = prepare_data(args[1], word_ids, emolex, char_map,
                                  label_map)
        if w:
            with open("traindata", "wb") as train_file:
                pickle.dump(train_data, train_file)
            with open("testdata", "wb") as test_file:
                pickle.dump(valid_data, test_file)

    # Get batches
    config = DefaultConfig()
    train_batches = generate_batches(*train_data, batch_size=config.batch_size)
    validation_batches = generate_batches(*valid_data,
                                          batch_size=config.batch_size)

    # Execute the model
Beispiel #23
0
                 layers=[[H, H] for i in range(Lmax)],
                 n_start=n_start,
                 save_path="models/bifinn/N_{0}_H_{1}".format(N, H),
                 status=status,
                 logfile="BiFiNN_N_{0}_H_{1}.log".format(N, H))
 mbifinn = Modified_BiFiNN(z_shape=z_shape,
                           Lmax=Lmax,
                           layers=[[H, H] for i in range(Lmax)],
                           n_start=n_start,
                           save_path="models/mbifinn/N_{0}_H_{1}".format(
                               N, H),
                           status=status,
                           logfile="MBiFiNN_N_{0}_H_{1}.log".format(N, H))
 train_data, test_data = prepare_data("dataset.mat",
                                      L=Lmax,
                                      train_index=train_index,
                                      test_index=test_index,
                                      basis_index=basis_index)
 if status == "train":
     podnn.train(train_data,
                 batch_size=batch_size,
                 epochs=epochs,
                 verbose=verbose)
     mpodnn.train(train_data,
                  batch_size=batch_size,
                  epochs=epochs,
                  verbose=verbose)
     bifinn.train(train_data,
                  batch_size=batch_size,
                  epochs=epochs,
                  verbose=verbose)
BATCH_SIZE = 1
MINI_BATCHES = 9
EPOCHS = 5000
LEARNING_RATE = 1e-3

N_CELLS = 250
N_CLASSES = len(CLASSES)

print('Dataset Size: {}\n'.format(len(SOUND_FILE_PATHS)))

# If the data hasn't been preprocessed, then do it now.
if not os.path.exists(TF_RECORDS_META) and \
   not os.path.exists(TF_RECORDS_TRAIN) and \
   not os.path.exists(TF_RECORDS_TEST):
    FEATURES_MIN, FEATURES_MAX, FEATURES_MEAN = prepare_data(
        SOUND_FILE_PATHS, TF_RECORDS_DESTINATION, MAX_SAMPLES)
    with open(TF_RECORDS_META, 'w') as OUTPUT:
        OUTPUT.write('{},{},{}'.format(FEATURES_MIN, FEATURES_MAX,
                                       FEATURES_MEAN))
else:
    with open(TF_RECORDS_META, 'r') as INPUT:
        META_DATA = INPUT.readline()
        FEATURES_MIN, FEATURES_MAX, FEATURES_MEAN = [
            float(DATA_POINT) for DATA_POINT in META_DATA.split(',')
        ]

print('Training Set Size: {}'.format(int(len(SOUND_FILE_PATHS) * .9)))
print('Test Set Size: {}\n'.format(int(len(SOUND_FILE_PATHS) * .1)))


def variable_on_cpu(name, shape, initializer, dtype=tf.float32):
Beispiel #25
0
        for i in range(self.Lmax):
            coeff_errors.append(
                compute_error(c_high[:, :(i + 1)], c_pred[i], scale=u_high))
            approx_errors.append(compute_error(u_high, u_pred[i],
                                               scale=u_high))
        return {
            "c": c_pred,
            "u": u_pred,
            "coeff_errors": coeff_errors,
            "approx_errors": approx_errors
        }

    def extend_data(self, data):
        new_data = copy.deepcopy(data)
        new_data["z"] = np.concatenate([data["z"], data["c_low"]], axis=1)
        return new_data


if __name__ == "__main__":
    from preprocessing import prepare_data
    bifinn = Modified_BiFiNN(z_shape=10,
                             Lmax=2,
                             layers=[[16, 16] for i in range(2)],
                             n_start=1)
    train_data, test_data = prepare_data("examples/example4/dataset.mat",
                                         L=2,
                                         train_index=range(500),
                                         test_index=range(500, 600),
                                         basis_index=range(600, 880))
    bifinn.train(train_data, batch_size=100, epochs=10, verbose=0)
    print(bifinn.load_and_test(test_data))
Beispiel #26
0
                    default=config.tokenized_path,
                    help='path to the training data')

args = parser.parse_args()

path = args.path
token_path = args.token_path

articles, summaries, dic = read_files(path, token_path)
# for i in dic.word2idx.keys():
#    print(i, dic.word2idx[i])
# exit()
word_count = len(dic)
print('Number of unique words:', word_count)

art_idx = prepare_data(articles, dic)
sum_idx = prepare_summary(summaries, dic)

# hello = prepare_data(['my name is pasquale'], dic)
# unked_hello = get_unked(hello, dic)
# print(hello)
# print(unked_hello)
# exit()

#prepare TRAIN
train_path = 'train_all.txt'
valid_path = 'val_all.txt'
test_path = 'test_all.txt'
dic_path = 'dictionary'
out_path = 'data_finish/'
Beispiel #27
0
def main():
    # Get the seconds since epoch
    current_timestamp = int(time.time())
    print("{} - predicting items".format(current_timestamp))

    model_types = ['uni', 'multiS', 'multiM']

    # SELECT ITEMS
    items_selected = item_selection(drop_percentage=0.5)
    items_to_predict = ['Amulet_of_strength', "Green_d'hide_vamb", 'Staff_of_fire', 'Zamorak_monk_top', 'Staff_of_air', \
      'Adamantite_bar', 'Zamorak_monk_bottom', 'Adamant_platebody', 'Runite_ore', 'Rune_scimitar', 'Rune_pickaxe', \
        'Rune_full_helm', 'Rune_kiteshield', 'Rune_2h_sword', 'Rune_platelegs', 'Rune_platebody', 'Old_school_bond']

    preprocessed_df = None
    for item_to_predict in items_to_predict:
        # GET LIST OF FEATURES
        if not os.path.isfile('models/features/{}_{}_features.txt'.format(
                item_to_predict, model_types[0])):
            print(
                "Model for {} hasn't been created, please run models.py first."
                .format(item_to_predict))
            return
        specific_feature_list = []
        with open(
                'models/features/{}_{}_features.txt'.format(
                    item_to_predict, model_types[0]), 'r') as filehandle:
            specific_feature_list = json.load(filehandle)

        t0 = time.time()
        # FEATURE EXTRACTION
        preprocessed_df = prepare_data(item_to_predict, items_selected, DATA_FOLDER="data/rsbuddy/", \
         reused_df=preprocessed_df, specific_features=specific_feature_list)

        t1 = time.time()
        # FEATURE SELECTION & NORMALIZATION
        selected_df, pred_std, pred_mean = regression_f_test(preprocessed_df, item_to_predict, \
         specific_features=specific_feature_list, number_of_features=len(specific_feature_list)-1)

        t2 = time.time()
        predictions = []
        for model_type in model_types:
            # LOADING AND APPLYING MODEL
            loaded_model = tf.keras.models.load_model(
                'models/{}_{}_model.h5'.format(item_to_predict, model_type))

            if (model_type == 'uni'):
                result = apply_univariate(selected_df, item_to_predict,
                                          loaded_model, pred_std, pred_mean)
            elif (model_type == 'multiS'):
                result = apply_multivariate_single_step(
                    selected_df, item_to_predict, loaded_model, pred_std,
                    pred_mean)
            elif (model_type == 'multiM'):
                result = apply_multivariate_multi_step(selected_df,
                                                       item_to_predict,
                                                       loaded_model, pred_std,
                                                       pred_mean)
            else:
                print("Unrecognized model type.")

            predictions.extend(result)
        tf.keras.backend.clear_session()

        t3 = time.time()

        print(
            'TIME LOG - preprocessing: {}, feature selection: {}, prediction: {}'
            .format(t1 - t0, t2 - t1, t3 - t2))

        new_predictions = [int(i) for i in predictions]
        print('item: {}, pred: {}'.format(item_to_predict, new_predictions))

        if os.path.isfile('data/predictions/{}.csv'.format(item_to_predict)):
            appendToCSV(item_to_predict, new_predictions, current_timestamp)
        else:
            writeToCSV(item_to_predict, new_predictions, current_timestamp)
def smote_test():
    train, test = preprocessing.prepare_data()
    lr_predict(train, test, 'smote')
def basic():
    train, test = preprocessing.prepare_data()
    lr_predict(train, test, 'basic')
Beispiel #30
0
        train_losses[i] = train_loss
        test_losses[i] = test_loss

        dt = datetime.now() - t0
        print(
            f'epoch {i+1}/{epochs} | train_loss: {train_loss:.4f} | test_oss: {test_loss:.4f} | duration: {dt}'
        )

    return train_losses, test_losses


# main
if __name__ == '__main__':

    # initialise
    train_images, train_labels, test_images, test_labels, train_positives, train_negatives, test_positives, test_negatives = prepare_data(
        datapath, H, W)
    model = SiameseModel(feature_dim)
    device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
    model.to(device)
    optimizer = torch.optim.Adam(model.parameters())

    train_steps = int(np.ceil(len(train_positives) / batch_size))
    test_steps = int(np.ceil(len(test_positives) / batch_size))

    # training loop
    train_losses, test_losses = train(model,
                                      contrastive_loss,
                                      optimizer,
                                      run_generator(train_positives,
                                                    train_negatives,
                                                    train_images),
def smote_test():
    train, test = preprocessing.prepare_data()
    train = preprocessing.undersample_negative_class(train, 5000)
    svm_predict(train, test, 'smote')
def basic():
    train, test = preprocessing.prepare_data()
    train = preprocessing.undersample_negative_class(train, 1000)
    svm_predict(train, test, 'basic')