def preprocess(voc_path, txt_path): assert os.path.isfile(voc_path) assert os.path.isfile(txt_path) logger = create_logger(None, 0) bin_path = txt_path + ".pth" dico = Dictionary.read_vocab(voc_path) logger.info("") data = Dictionary.index_data(txt_path, bin_path, dico) logger.info("%i words (%i unique) in %i sentences." % (len(data['sentences']) - len(data['positions']), len(data['dico']), len(data['positions']))) if len(data['unk_words']) > 0: logger.info( "%i unknown words (%i unique), covering %.2f%% of the data." % (sum(data['unk_words'].values()), len( data['unk_words']), sum(data['unk_words'].values()) * 100. / (len(data['sentences']) - len(data['positions'])))) if len(data['unk_words']) < 30: for w, c in sorted(data['unk_words'].items(), key=lambda x: x[1])[::-1]: logger.info("%s: %i" % (w, c))
def main(args): input_file = None output_file = None logger = create_logger(LOG_FILE, LOG_LEVEL) logger.info("Geofencing validator logger was created.") try: opts, args = getopt.getopt(sys.argv[1:], "i:o:", ["input=", "output="]) except getopt.GetoptError as exc: logger.error( "Got an error: \"{}\", while trying to get options".format(exc)) sys.exit(2) for opt, arg in opts: if opt in ("-i", "--input"): input_file = arg elif opt in ("-o", "--output"): output_file = arg else: logger.error("Incorrect parameter was set") sys.exit(usage) if input_file and output_file: validator = Validator(input_file, output_file) validator.process() else: logger.error("No input csv file or output file path in parameter") sys.exit(usage)
def initialize_exp(params, *args, dump_params=True): """ Initialize the experience: - dump parameters - create checkpoint repo - create a logger - create a panda object to keep track of the training statistics """ # dump parameters if dump_params: pickle.dump(params, open(os.path.join(params.dump_path, "params.pkl"), "wb")) # create repo to store checkpoints params.dump_checkpoints = os.path.join(params.dump_path, "checkpoints") if not params.rank and not os.path.isdir(params.dump_checkpoints): os.mkdir(params.dump_checkpoints) # create a panda object to log loss and acc training_stats = PD_Stats( os.path.join(params.dump_path, "stats" + str(params.rank) + ".pkl"), args ) # create a logger logger = create_logger( os.path.join(params.dump_path, "train.log"), rank=params.rank ) logger.info("============ Initialized logger ============") logger.info( "\n".join("%s: %s" % (k, str(v)) for k, v in sorted(dict(vars(params)).items())) ) logger.info("The experiment will be stored in %s\n" % params.dump_path) logger.info("") return logger, training_stats
def create_app(): app = Flask(__name__) app.config.from_object(config) create_logger() admin.init_app(app) admin.add_view(ModelView(Teams, session)) @app.route("/" + config.BOT_API_TOKEN, methods=['POST']) def get_message(): bot.process_new_updates([ telebot.types.Update.de_json(request.stream.read().decode("utf-8")) ]) return "!", 200 @app.route("/") def webhook(): bot.remove_webhook() bot.set_webhook(url=f'{config.APP_HOST}/{config.BOT_API_TOKEN}') return 'Hook was set!', 200 return app
import argparse import torch from src.logger import create_logger import os from src.model import build_mt_model from src.data.loader import load_data import subprocess import re logger = create_logger('translate.log') os.environ["CUDA_VISIBLE_DEVICES"] = "2" parser = argparse.ArgumentParser(description='Settings') parser.add_argument("--train_data", type=str, default='data/120w.bin', help="train data dir") parser.add_argument("--max_len", type=int, default=50, help="max length of sentences") parser.add_argument("--reload_model", type=str, default='', help="reload model") parser.add_argument("--batch_size", type=int, default=64, help="batch size") parser.add_argument("--batch_size_tokens", type=int, default=-1, help="batch size tokens") parser.add_argument("--src_n_words", type=int, default=0, help="data")
# LICENSE file in the root directory of this source tree. # """ Example: python data/vocab.txt data/train.txt vocab.txt: 1stline=word, 2ndline=count """ import os import sys from src.logger import create_logger from src.data.dictionary import Dictionary if __name__ == "__main__": logger = create_logger(None, 0) voc_path = sys.argv[1] txt_path = sys.argv[2] bin_path = sys.argv[2] + ".pth" assert os.path.isfile(voc_path) assert os.path.isfile(txt_path) dico = Dictionary.read_vocab(voc_path) logger.info("") data = Dictionary.index_data(txt_path, bin_path, dico) logger.info("%i words (%i unique) in %i sentences." % ( len(data["sentences"]) - len(data["positions"]), len(data["dico"]), len(data["positions"]),
type=int, default=2, help="verbose level (2:debug, 1:info, 0:warning)") parser.add_argument("--log", default="log", type=str, help="the log file") parser.add_argument("--n", type=int, default=0, help="How many should we add?") parser.add_argument("--portion", type=float, default=0.5, help="Portion of data points for testing.") parser.add_argument("--test_id_options_num", type=int, default=10, help="Number of train/test split.") parser.add_argument("--sample_options_num", type=int, default=10, help="Number of samples for n data points.") params = parser.parse_args() logger = create_logger(params.log, vb=params.verbose) init_logging() task = params.task # n = 5 n = params.n run_ex(task, n, "xgboost", params.portion)
def setup_data_and_model(params, model): # Variables that may not otherwise be assigned writer = perturbation_loader = generator = training_smiles = None # setup random seeds if params.val_seed is None: params.val_seed = params.seed set_seed_if(params.seed) exp_path = os.path.join(params.dump_path, params.exp_name) # create exp path if it doesn't exist if not os.path.exists(exp_path): os.makedirs(exp_path) # create logger logger = create_logger(os.path.join(exp_path, 'train.log'), 0) pp = pprint.PrettyPrinter() logger.info("============ Initialized logger ============") logger.info("Random seed is {}".format(params.seed)) if params.suppress_params is False: logger.info("\n".join("%s: %s" % (k, str(v)) for k, v in sorted(dict(vars(params)).items()))) logger.info("Running command: %s" % 'python ' + ' '.join(sys.argv)) logger.info("The experiment will be stored in %s\n" % exp_path) logger.info("") # load data train_data, val_dataset, train_loader, val_loader = load_graph_data(params) logger.info ('train_loader len is {}'.format(len(train_loader))) logger.info ('val_loader len is {}'.format(len(val_loader))) if params.num_binary_graph_properties > 0 and params.pretrained_property_embeddings_path: model.binary_graph_property_embedding_layer.weight.data = \ torch.Tensor(np.load(params.pretrained_property_embeddings_path).T) if params.load_latest is True: load_prefix = 'latest' elif params.load_best is True: load_prefix = 'best' else: load_prefix = None if load_prefix is not None: if params.local_cpu is True: model.load_state_dict(torch.load(os.path.join(exp_path, '{}_model'.format(load_prefix)), map_location='cpu')) else: model.load_state_dict(torch.load(os.path.join(exp_path, '{}_model'.format(load_prefix)))) if params.local_cpu is False: model = model.cuda() if params.gen_num_samples > 0: generator = GraphGenerator(train_data, model, params.gen_random_init, params.gen_num_iters, params.gen_predict_deterministically, params.local_cpu) with open(params.smiles_path) as f: smiles = f.read().split('\n') training_smiles = smiles[:int(params.smiles_train_split * len(smiles))] del smiles opt = get_optimizer(model.parameters(), params.optimizer) if load_prefix is not None: opt.load_state_dict(torch.load(os.path.join(exp_path, '{}_opt_sd'.format(load_prefix)))) lr = opt.param_groups[0]['lr'] lr_lambda = lambda iteration: lr_decay_multiplier(iteration, params.warm_up_iters, params.decay_start_iter, params.lr_decay_amount, params.lr_decay_frac, params.lr_decay_interval, params.min_lr, lr) scheduler = LambdaLR(opt, lr_lambda) index_method = get_index_method() best_loss = 9999 if params.tensorboard: from tensorboardX import SummaryWriter writer = SummaryWriter(exp_path) total_iter, grad_accum_iters = params.first_iter, 0 return params, model, opt, scheduler, train_data, train_loader, val_dataset, val_loader, perturbation_loader,\ generator, index_method, exp_path, training_smiles, pp, logger, writer, best_loss, total_iter,\ grad_accum_iters
import argparse from src.logger import create_logger import os import torch logger = create_logger('train.log') parser = argparse.ArgumentParser(description='Settings') parser.add_argument("--train_data", type=str, default='data/cwmt.bin', help="train data dir") parser.add_argument("--max_len", type=int, default=100, help="max length of sentences") parser.add_argument("--reload_model", type=str, default='', help="reload model") parser.add_argument("--batch_size", type=int, default=80, help="batch size sentences") parser.add_argument("--batch_size_tokens", type=int, default=4000, help="batch size tokens") parser.add_argument("--src_n_words", type=int, default=0, help="data") parser.add_argument("--tgt_n_words", type=int, default=0, help="data") parser.add_argument("--dropout", type=float, default=0.1, help="Dropout") parser.add_argument("--label-smoothing", type=float, default=0.1, help="Label smoothing") parser.add_argument("--attention", type=bool, default=True, help="Use an attention mechanism") parser.add_argument("--transformer", type=bool, default=True, help="Use Transformer") parser.add_argument("--emb_dim", type=int, default=512, help="Embedding layer size")
def main(params): # setup random seeds set_seed(params.seed) params.ar = True exp_path = os.path.join(params.dump_path, params.exp_name) # create exp path if it doesn't exist if not os.path.exists(exp_path): os.makedirs(exp_path) # create logger logger = create_logger(os.path.join(exp_path, 'train.log'), 0) logger.info("============ Initialized logger ============") logger.info("Random seed is {}".format(params.seed)) logger.info("\n".join("%s: %s" % (k, str(v)) for k, v in sorted(dict(vars(params)).items()))) logger.info("The experiment will be stored in %s\n" % exp_path) logger.info("Running command: %s" % 'python ' + ' '.join(sys.argv)) logger.info("") # load data data, loader = load_smiles_data(params) if params.data_type == 'ChEMBL': all_smiles_mols = open(os.path.join(params.data_path, 'guacamol_v1_all.smiles'), 'r').readlines() else: all_smiles_mols = open(os.path.join(params.data_path, 'QM9_all.smiles'), 'r').readlines() train_data, val_data = data['train'], data['valid'] dico = data['dico'] logger.info ('train_data len is {}'.format(len(train_data))) logger.info ('val_data len is {}'.format(len(val_data))) # keep cycling through train_loader forever # stop when max iters is reached def rcycle(iterable): saved = [] # In-memory cache for element in iterable: yield element saved.append(element) while saved: random.shuffle(saved) # Shuffle every batch for element in saved: yield element train_loader = rcycle(train_data.get_iterator(shuffle=True, group_by_size=True, n_sentences=-1)) # extra param names for transformermodel params.n_langs = 1 # build Transformer model model = TransformerModel(params, is_encoder=False, with_output=True) if params.local_cpu is False: model = model.cuda() opt = get_optimizer(model.parameters(), params.optimizer) scores = {'ppl': np.float('inf'), 'acc': 0} if params.load_path: reloaded_iter, scores = load_model(params, model, opt, logger) for total_iter, train_batch in enumerate(train_loader): if params.load_path is not None: total_iter += reloaded_iter + 1 epoch = total_iter // params.epoch_size if total_iter == params.max_steps: logger.info("============ Done training ... ============") break elif total_iter % params.epoch_size == 0: logger.info("============ Starting epoch %i ... ============" % epoch) model.train() opt.zero_grad() train_loss = calculate_loss(model, train_batch, params) train_loss.backward() if params.clip_grad_norm > 0: clip_grad_norm_(model.parameters(), params.clip_grad_norm) opt.step() if total_iter % params.print_after == 0: logger.info("Step {} ; Loss = {}".format(total_iter, train_loss)) if total_iter > 0 and total_iter % params.epoch_size == (params.epoch_size - 1): # run eval step (calculate validation loss) model.eval() n_chars = 0 xe_loss = 0 n_valid = 0 logger.info("============ Evaluating ... ============") val_loader = val_data.get_iterator(shuffle=True) for val_iter, val_batch in enumerate(val_loader): with torch.no_grad(): val_scores, val_loss, val_y = calculate_loss(model, val_batch, params, get_scores=True) # update stats n_chars += val_y.size(0) xe_loss += val_loss.item() * len(val_y) n_valid += (val_scores.max(1)[1] == val_y).sum().item() ppl = np.exp(xe_loss / n_chars) acc = 100. * n_valid / n_chars logger.info("Acc={}, PPL={}".format(acc, ppl)) if acc > scores['acc']: scores['acc'] = acc scores['ppl'] = ppl save_model(params, data, model, opt, dico, logger, 'best_model', epoch, total_iter, scores) logger.info('Saving new best_model {}'.format(epoch)) logger.info("Best Acc={}, PPL={}".format(scores['acc'], scores['ppl'])) logger.info("============ Generating ... ============") number_samples = 100 gen_smiles = generate_smiles(params, model, dico, number_samples) generator = ARMockGenerator(gen_smiles) try: benchmark = ValidityBenchmark(number_samples=number_samples) validity_score = benchmark.assess_model(generator).score except: validity_score = -1 try: benchmark = UniquenessBenchmark(number_samples=number_samples) uniqueness_score = benchmark.assess_model(generator).score except: uniqueness_score = -1 try: benchmark = KLDivBenchmark(number_samples=number_samples, training_set=all_smiles_mols) kldiv_score = benchmark.assess_model(generator).score except: kldiv_score = -1 logger.info('Validity Score={}, Uniqueness Score={}, KlDiv Score={}'.format(validity_score, uniqueness_score, kldiv_score)) save_model(params, data, model, opt, dico, logger, 'model', epoch, total_iter, {'ppl': ppl, 'acc': acc})
from src.bot.messages import send_future_match, send_result_match, send_closest_match from src.extensions import session from src.logger import create_logger from src.models import Series from src.models import Teams from src.models import Users from src.parser.dota_series import DotaParser logger = create_logger() def send_future_matches_to_users(): users = session.query(Users).all() for user in users: user_teams = user.get_user_teams() matches = Series.get_today_matches() for match in matches: if match.team1_name in user_teams or match.team2_name in user_teams: send_future_match(user.id, match) def send_updated_matches_to_user(matches: list): users_for_update = get_users_to_updates(matches) for user in users_for_update: user_teams = user.get_user_teams() for match in matches: if match.team1_name in user_teams or match.team2_name in user_teams: send_future_match(user.id, match) def send_closest_matches_to_user():
from src.read_data import read_data, K_Fold_Spliter, Random_Spliter, Specific_Spliter from pathlib import Path from src.logger import create_logger tasks = [ "wiki", "monomt", "tsfmt", "tsfparsing", "tsfpos", "tsfel", "bli", "ma", "ud" ] data_folder = Path(__file__).parent / ".." logger = create_logger("pytest.log", vb=1) def test_load_data(): logger.info("*" * 20) data = read_data(task="monomt", folder=data_folder, shuffle=True, selected_feats=None, combine_models=False) assert len(data["BLEU"]["feats"]) == 54 assert len(data["BLEU"]["labels"]) == 54 assert len(data["BLEU"]["langs"]) == 54 assert list(data["BLEU"]["langs"].columns.values) == [ "Source Language", "Target Language" ] # test_feature_selection logger.info("*" * 20) data = read_data(task="monomt", folder=data_folder,
from flask import Flask, request, Response from nltk.tokenize import sent_tokenize import json from translate import translate_onesentence from store import storeonesent2mysql, update_table_viamysql, update_mysql_viafile from src.logger import create_logger import datetime logger = create_logger('access.log') app = Flask(__name__) # bilingual dictionary # provided to update and translate function TAGTABLE = {} number = update_table_viamysql(TAGTABLE, 'pe', 'zh') logger.info(f'loading {number} pairs from table') @app.route('/api/split', methods=['POST']) def split_sentence(): if request.method == 'POST': now = datetime.datetime.now() year, month, day = now.year, now.month, now.day left_month = 13 - ((year - 2020) * 12 + (month - 3)) if left_month < 0: result = {} result['errorCode'] = 1 result['error'] = '试用已经过期,请联系管理员。' return Response(json.dumps(result), mimetype='application/javascript') else: