def generate(): page_count = get_page_count() page_no = 1 game_list = [] while page_no <= page_count: print(f"__XBOX__ Page No: {page_no}") try: url = BASE_URL + str(page_no) page = requests.get(url) soup = BeautifulSoup(page.content, 'html.parser') games = soup.find_all('a', class_='Game') for game in games: game_url = f"{MARKET_PLACE}{game['href']}" game_title, price = get_game_details(game_url) game_detail = {'title': game_title, 'price': price, 'url': game_url} # print(game_title, price) game_list.append(game_detail) page_no += 1 except Exception as e: print(e) sleep(5) print(f'failed on page no {page_no}, retrying...') #sort by price game_list = sorted(game_list, key=itemgetter('price')) utils.generate_csv('xbox', game_list, ['title', 'price', 'url']) utils.generate_html('xbox', game_list)
def generate(): page_count = 10 page_num = 1 game_list = [] while page_num <= page_count: print(f"\n__PS4__ Page No: {page_num}\n") try: url = f"{PS_STORE}/{PS4_STORE}/{page_num}?{STORE_FILTERS}" page = requests.get(url) soup = BeautifulSoup(page.content, "html.parser") if page_num == 1: page_count = get_page_count(soup) games = soup.find_all("div", class_="grid-cell--game") for game in games: game_list.append(get_game_details(game)) page_num += 1 sleep(1) # break except Exception as e: print(e) sleep(5) print(f"failed on page no {page_num}, retrying...") #sort by price game_list = sorted(game_list, key=itemgetter("price")) utils.generate_csv("ps4", game_list, ["title", "price", "url"]) utils.generate_html("ps4", game_list)
def generate(): category_names = CATEGORIES.keys() store_names = STORES.keys() for store_name in store_names: for category_name in category_names: print(f'__CEX__ {store_name} => {category_name}') category_id = CATEGORIES[category_name] store_id = STORES[store_name] games = get_games(category_id, store_id) if games: utils.generate_csv(f'cex-{store_name}-{category_name}', games, ['title', 'price', 'url']) utils.generate_html(f'cex-{store_name}-{category_name}', games)
def inference(): device = "cuda:0" if torch.cuda.is_available() else "cpu" save_file = input("save model name : ") try: if torch.cuda.is_available(): model = torch.load(save_file, map_location={"cpu": "cuda:0"}) else: model = torch.load(save_file, map_location={"cuda:0": "cpu"}) print("Success loading model") except IOError: print("Couldn't find model") sys.exit(0) print("best epoch was {}".format(model.info_dict['epoch'])) # 1783 : length of test data set test_data_loader = dataloader.DataLoader(1783, test=True) model.eval() with torch.no_grad(): X, _ = test_data_loader.get_batch() X = X.to(device) output = model(X) utils.generate_csv(output)
def index(): template = 'index.html' form = request.form data = {} if len(form): submit = form.get('form.button.submit', '') website_url = form.get('website_url', '') if not website_url: return render_template(template) timeout = int(form.get('timeout', '5')) depth = int(form.get('depth', '0')) csv_data = form.get('csv_data', '') if submit == 'csv': if csv_data: return csv_response(csv_data) return render_template(template) result = return_error_pages( site_links=[website_url], config={ 'depth': depth, 'timeout': timeout, 'workers': 12, 'mode': 'process', 'progress': True, 'test-outside': True, }, ) if result: result = check_redirects(result) csv_data = generate_csv(result) data['errors'] = result data['website_url'] = website_url data['timeout'] = timeout data['depth'] = depth data['csv_data'] = csv_data return render_template(template, **data)
import os, sys, argparse import importlib import numpy as np from sklearn.decomposition import PCA import utils def calc_dist(model, latents): return np.mean((model.inverse_transform(model.transform(latents)) - latents) ** 2, axis=1) if __name__ == '__main__': parser = argparse.ArgumentParser() parser.add_argument('latents_file', nargs='+') parser.add_argument('output_file', help='predicted file') args = parser.parse_args() latents_path = args.latents_file output_file = args.output_file latents = np.concatenate([np.load(path) for path in latents_path], axis=0) latents = latents.reshape(latents.shape[0], -1) print(f'\033[32;1mlatents: {latents.shape}\033[0m') model = PCA(2, random_state=880301).fit(latents) dist = calc_dist(model, latents) utils.generate_csv(dist, output_file)
import os import argparse import numpy as np import utils if __name__ == '__main__': parser = argparse.ArgumentParser() parser.add_argument('ensemble_result') parser.add_argument('sources', nargs='+', help='npy files to be ensembled') args = parser.parse_args() ensemble_result = args.ensemble_result sources = args.sources f = np.load(sources[0]).ravel().astype(np.float32) for name in sources[1:]: f += np.load(name).ravel() f /= len(sources) utils.generate_csv(f, ensemble_result)
input_shape = (32, 32) latents = np.concatenate([np.load(path) for path in latents_path], axis=0) latents = latents.reshape(latents.shape[0], -1) print(f'\033[32;1mlatents: {latents.shape}\033[0m') np.random.seed(880301) if transform_function not in globals(): globals()[transform_function] = getattr( importlib.import_module( transform_function[:transform_function.rfind('.')]), transform_function.split('.')[-1]) if n_clusters is not None: model, transformedX, pred = GeneralClustering( globals()[transform_function](seed, n_clusters)).fit_transform(latents) else: model, transformedX, pred = GeneralClustering( globals()[transform_function](seed)).fit_transform(latents) utils.save_model(model_path, model) dist = calc_dist(model, transformedX, pred) if test: if ensemble: np.save(test, dist) else: utils.generate_csv(dist, test) else: print(f'\033[32;1mValidation score: {np.mean(dist)}\033[0m')
test = args.test if training: trainX, trainY, _, _ = utils.load_train_data(train_file[0], train_file[1], normalize=False) trainX = np.matrix(trainX[:, 1:]) # remove bias coefficient print(f'\033[32;1mtrainX: {trainX.shape}, trainY: {trainY.shape}\033[0m') mu0, mu1 = np.mean(trainX[(trainY == 0).ravel()], axis=0).T, np.mean(trainX[(trainY == 1).ravel()], axis=0).T cov = np.matrix(np.sum(trainY) / trainY.shape[0] * np.cov(trainX[(trainY == 0).ravel()].T) + (1 - np.sum(trainY) / trainY.shape[0]) * np.cov(trainX[(trainY == 1).ravel()].T)) u, s, v = np.linalg.svd(cov, full_matrices=False) cov_I = np.matmul(v.T * 1 / s, u.T) w = cov_I.T * (mu1 - mu0) b = -0.5 * mu1.T * cov_I * mu1 + 0.5 * mu0.T * cov_I * mu0 + np.log(np.sum(trainY)/ (trainY.shape[0] - np.sum(trainY))) np.save(model_path, [w, b]) else: w, b = np.load(model_path, allow_pickle=True) if test: testX = utils.load_test_data(test[0]) testX = np.matrix(testX[:, 1:]) utils.generate_csv(f(testX, w, b), test[1]) else: if not training: trainX, trainY, _, _ = utils.load_train_data(train_file[0], train_file[1], normalize=False) trainX = np.matrix(trainX[:, 1:]) # remove bias coefficient print(f'loss: {loss(trainX, trainY, w, b):.5}, acc: {accuracy(trainX, trainY, w, b):.4}')
print( f'epoch {epoch + 100:04}, loss: {rmse(trainX, trainY, w):.5}, valid_loss: {rmse(validX, validY, w):.5}' ) else: print( f'epoch {epoch + 100:04}, loss: {rmse(trainX, trainY, w):.5}' ) a = w[1:].reshape(-1, hr) for i in a: print(('%.3f ' * hr) % tuple(i)) np.save(model_path, w) else: w = np.load(model_path) mean, std = np.load(model_path[:model_path.rfind('.npy')] + '_mean.npy'), np.load( model_path[:model_path.rfind('.npy')] + '_std.npy') if test: testX = utils.load_test_data(test[0], mean, std) utils.generate_csv(testX @ w, test[1]) else: if not training: trainX, trainY, mean, std = utils.load_train_data(train_file, hr) if split_ratio > 0: trainX, validX, trainY, validY = utils.train_test_split( trainX, trainY, 0.1) print(f'Training loss: {rmse(trainX, trainY, w)}') if split_ratio > 0: print(f'Validation loss: {rmse(validX, validY, w)}')
def main(): parser = argparse.ArgumentParser( description='Classifiar using triplet loss.') parser.add_argument('--CVDs', type=str, default='0,1,2,3', metavar='CUDA_VISIBLE_DEVICES', help='CUDA_VISIBLE_DEVICES') parser.add_argument( '--train-set', type=str, default='/home/zili/memory/FaceRecognition-master/data/mnist/train', metavar='dir', help='path of train set.') parser.add_argument( '--test-set', type=str, default='/home/zili/memory/FaceRecognition-master/data/mnist/test', metavar='dir', help='path of test set.') parser.add_argument( '--train-set-csv', type=str, default='/home/zili/memory/FaceRecognition-master/data/mnist/train.csv', metavar='file', help='path of train set.csv.') parser.add_argument( '--test-set-csv', type=str, default='/home/zili/memory/FaceRecognition-master/data/mnist/test.csv', metavar='file', help='path of test set.csv.') parser.add_argument('--num-triplet', type=int, default=10000, metavar='N', help='number of triplet in dataset (default: 32)') parser.add_argument('--train-batch-size', type=int, default=256, metavar='N', help='input batch size for training (default: 32)') parser.add_argument('--test-batch-size', type=int, default=512, metavar='N', help='input batch size for testing (default: 64)') parser.add_argument('--epochs', type=int, default=100, metavar='N', help='number of epochs to train (default: 100)') parser.add_argument('--embedding-size', type=int, default=256, metavar='N', help='embedding size of model (default: 256)') parser.add_argument('--lr', type=float, default=0.05, metavar='LR', help='learning rate (default: 0.01)') parser.add_argument('--margin', type=float, default=1.0, metavar='margin', help='loss margin (default: 1.0)') parser.add_argument('--kneighbor', type=int, default=20, metavar='N', help='how many neighbor in testing') parser.add_argument('--num-classes', type=int, default=10, metavar='N', help='classes number of dataset') parser.add_argument('--momentum', type=float, default=0.8, metavar='M', help='SGD momentum (default: 0.9)') parser.add_argument('--seed', type=int, default=1, metavar='S', help='random seed (default: 1)') parser.add_argument( '--log-interval', type=int, default=4, metavar='N', help='how many batches to wait before logging training status') parser.add_argument('--model-name', type=str, default='resnet34', metavar='M', help='model name (default: resnet34)') parser.add_argument('--dropout-p', type=float, default=0.2, metavar='D', help='Dropout probability (default: 0.2)') parser.add_argument('--check-path', type=str, default='checkpoints3', metavar='C', help='Checkpoint path') parser.add_argument( '--is-semihard', type=bool, default=True, metavar='R', help='whether the dataset is selected in semi-hard way.') parser.add_argument('--is-pretrained', type=bool, default=False, metavar='R', help='whether model is pretrained.') args = parser.parse_args() os.environ["CUDA_VISIBLE_DEVICES"] = args.CVDs output1 = 'main' + str(datetime.datetime.now()) f = open(args.check_path + os.path.sep + output1 + '.txt', 'w+') l2_dist = PairwiseDistance(2) writer = SummaryWriter() print('Loading model...') model = FaceModel(embedding_size=args.embedding_size, num_classes=args.num_classes, pretrained=args.is_pretrained) f.write(" model: {}".format(model.model) + '\r\n') if torch.cuda.is_available(): model = torch.nn.DataParallel(model).cuda() cudnn.benchmark = True optimizer = optim.SGD(model.parameters(), lr=args.lr, momentum=args.momentum, weight_decay=1e-5) # optimizer = optim.Adam(model.parameters(), lr=args.lr) print('start training...') features, labels, clf = feature(model, args) for epoch in range(args.epochs): if epoch % 5 == 0: file_operation(f, args, optimizer) if (epoch + 1) % 2 == 0: args.lr = args.lr / 3 update_lr(optimizer, args.lr) generate_csv(args) train(epoch, model, optimizer, args, f, writer, features) features, labels, clf = feature(model, args) validate(epoch, model, clf, args, f, writer) f.write('\r\n') torch.save(model, args.check_path + os.path.sep + output1 + '.pkl')
callbacks=[checkpoint, reduce_lr, logger, tensorboard]) else: print('\033[32;1mLoading Model\033[0m') model.load_weights(model_path) if test: testX = utils.load_data(test[0], word2idx_en, max_seq_len, label=False) pred = predict(encoder, decoder, testX, word2idx_cn, beam_search=beam_search_enabled) if ensemble: np.save(test[1], pred) else: utils.generate_csv(pred, idx2word_cn, test[1]) else: if not training: trainX, trainY_decoder_in, trainY, trainY_raw = utils.load_data( os.path.join(data_dir, 'training.txt'), word2idx_en, max_seq_len, label=True, word2idx_Y=word2idx_cn) validX, validY_decoder_in, validY, validY_raw = utils.load_data( os.path.join(data_dir, 'validation.txt'), word2idx_en, max_seq_len, label=True, word2idx_Y=word2idx_cn) print(
train_file[1], normalize=False) trainX, validX, trainY, validY = utils.train_test_split( trainX, trainY, 0.1) print( f'\033[32;1mtrainX: {trainX.shape}, trainY: {trainY.shape}, validX: {validX.shape}, validY: {validY.shape}\033[0m' ) if training: model = GradientBoostingClassifier( learning_rate=0.1, n_estimators=200, max_depth=3, random_state=880301) #, n_iter_no_change=10, tol=1e-4) model.fit(trainX, trainY.ravel()) utils.save_model(model_path, model) #a = model.feature_importances_[1:].reshape(-1, 9) #for i in a: # print(('%.3f '*9) % tuple(i)) else: model = utils.load_model(model_path) if test: testX = utils.load_test_data(test[0], mean, std) utils.generate_csv(model.predict(testX), test[1]) else: print( f'\033[32;1mTraining score: {model.score(trainX, trainY)}\033[0m') print( f'\033[32;1mValidaiton score: {model.score(validX, validY)}\033[0m' )
train_file = args.training_file training = not args.no_training test = args.test if training: trainX, trainY, mean, std = utils.load_train_data(train_file, 9) trainX, trainY = np.matrix(trainX), np.matrix(trainY) print( f'\033[32;1mtrainX: {trainX.shape}, trainY: {trainY.shape}\033[0m') np.save('mean_best.npy', mean) np.save('std_best.npy', std) w = (trainX.T * trainX).I * (trainX.T * trainY) #a = np.array(w)[1:].reshape(-1, 9) #for i in a: # print(('%.3f '*9) % tuple(i)) #print(w.shape) np.save(model_path, w) else: w = np.load(model_path) mean, std = np.load('mean_best.npy'), np.load('std_best.npy') if test: testX = np.matrix(utils.load_test_data(test[0], mean, std)) utils.generate_csv(np.array(testX * w), test[1]) else: if not training: trainX, trainY, mean, std = utils.load_train_data(train_file, 9) trainX, trainY = np.matrix(trainX), np.matrix(trainY) print(f'Training loss: {rmse(trainX, trainY, w)}')
verbose=1, callbacks=[checkpoint, reduce_lr]) else: print('\033[32;1mLoading Model\033[0m') model.load_weights(model_path) if test: testX = utils.load_test_data(data_dir, input_shape, normalize=normalize, preprocessing=True) pred = model.predict(testX) if ensemble: np.save(test, pred) else: utils.generate_csv(pred, test) else: if not training: trainX, trainY = utils.load_train_data(data_dir, input_shape, preprocessing=True) trainX, validX, trainY, validY = utils.train_test_split( trainX, trainY, split_ratio=0.1, seed=seed) print( f'\033[32;1mtrainX: {trainX.shape}, trainY: {trainY.shape}, validX: {validX.shape}, validY: {validY.shape}\033[0m' ) print( f'\033[32;1mTraining score: {model.evaluate(trainX, trainY, batch_size=128, verbose=0)}\033[0m' ) print( f'\033[32;1mValidaiton score: {model.evaluate(validX, validY, batch_size=128, verbose=0)}\033[0m'
epochs=100, verbose=2, callbacks=[checkpoint, reduce_lr]) else: print('\033[32;1mLoading Model\033[0m') model.load_weights(model_path) if test: testX = np.load(test[0]) if test[1][-4:] == '.npy': pred = encoder.predict(testX, batch_size=512) np.save(test[1], pred[0] if isinstance(pred, (list, tuple)) else pred) else: pred = model.predict(testX) utils.generate_csv(np.sum((pred - testX)**2, axis=(1, 2, 3)), test[1]) elif output_image: testX = np.load(output_image[0]) pred = model.predict(testX[:10]) testX = np.concatenate(testX[:10] * 128 + 128, axis=1).astype(np.uint8) pred = np.concatenate(pred * 128 + 128, axis=1).astype(np.uint8) cv2.imwrite(output_image[1], np.concatenate([testX, pred], axis=0)) else: if not training: trainX = np.load(trainX_path) trainX, validX = utils.train_test_split(trainX, split_ratio=0.1) print( f'\033[32;1mtrainX: {trainX.shape}, validX: {validX.shape}\033[0m' ) print(
4, verbose=1, min_lr=1e-6) #logger = CSVLogger(model_path+'.csv') #tensorboard = TensorBoard(model_path[:model_path.rfind('.')]+'_logs', histogram_freq=1, batch_size=1024, write_grads=True, update_freq='epoch') model.fit(trainX, trainY, batch_size=128, epochs=100, validation_data=(validX, validY), verbose=1, callbacks=[checkpoint, reduce_lr]) else: print('\033[32;1mLoading Model\033[0m') model.load_weights(model_path) if test: testX = utils.load_test_data(test[0], mean, std) pred = model.predict(testX) if ensemble: np.save(test[1], pred) else: utils.generate_csv(pred, test[1]) else: print( f'\033[32;1mTraining score: {model.evaluate(trainX, trainY, batch_size=256, verbose=0)}\033[0m' ) print( f'\033[32;1mValidaiton score: {model.evaluate(validX, validY, batch_size=256, verbose=0)}\033[0m' )
help='testing file and the predicted file') args = parser.parse_args() model_path = args.model_path train_file = args.training_file training = not args.no_training test = args.test trainX, trainY, mean, std = utils.load_train_data(train_file[0], train_file[1]) trainY = (trainY * 2 - 1).astype(np.int32).ravel() trainX, validX, trainY, validY = utils.train_test_split(trainX, trainY) print( f'\033[32;1mtrainX: {trainX.shape}, trainY: {trainY.shape}, validX: {validX.shape}, validY: {validY.shape}\033[0m' ) if training: T = 32 clf = RandomForest(T).fit(trainX, trainY, max_height=9) utils.save_model(model_path, clf) else: clf = utils.load_model(model_path) if test: testX = utils.load_test_data(test[0], mean, std) utils.generate_csv((model.predict(testX) + 1) / 2, test[1]) else: print(f'\033[32;1mTraining score: {clf.score(trainX, trainY)}\033[0m') print( f'\033[32;1mValidaiton score: {clf.score(validX, validY)}\033[0m')