def set_value(shop_id: int, name: str, value, path="/"): value_type = "str" if isinstance(value, bool): value_type = "bool" elif isinstance(value, float): value_type = "float" elif isinstance(value, int): value_type = "int" conf = (Configuration.query.filter( Configuration.shop_id == shop_id).filter( Configuration.name == name).filter( Configuration.path == path).first()) if conf: if Configuration.get_typed_value(conf.value, conf.value_type) != value: conf.value = str(value) conf.value_type = value_type conf.save() else: Configuration( shop_id=shop_id, name=name, value=str(value), value_type=value_type, path=path, ).save()
def set_common_value(name: str, value): value_type = "str" if isinstance(value, bool): value_type = "bool" elif isinstance(value, float): value_type = "float" elif isinstance(value, int): value_type = "int" elif isinstance(value, datetime): value_type = "datetime" value = value.isoformat() conf = ( Configuration.query.filter( Configuration.shop_id == None) # noqa E711 .filter(Configuration.name == name).first()) if conf: if Configuration.get_typed_value(conf.value, conf.value_type) != value: conf.value = str(value) conf.value_type = value_type conf.save() else: Configuration( name=name, value=str(value), value_type=value_type, ).save()
def create_result_dir(config: BaseConfig) -> str: """Creates a result directory and saves the current config in this directory. Args: config: A configuration containing `base_result_dir` and `unique_name`. The new directory is created at path base_result_dir/unique_name) Returns: Path to the created result directory. """ result_dir = os.path.join(config.base_result_dir, config.unique_name) os.makedirs(result_dir) config.to_yaml(os.path.join(result_dir, "config.yaml")) return result_dir
def insert_abdu(login_list): """ insert multiple vendors into the vendors table """ sql = "INSERT INTO faculty(netid, firstname, lastname, email, dept, term, course_number) VALUES(%s, %s, %s, %s, %s, %s, %s)" conn = None try: params = BaseConfig() host_ = params.DB_SERVICE port_ = params.DB_PORT database_ = params.DB_NAME user_ = params.DB_USER password_ = params.DB_PASS conn = psycopg2.connect(host=host_, database=database_, user=user_, password=password_, port=port_) cur = conn.cursor() cur.executemany(sql, login_list) conn.commit() cur.close() except (Exception, psycopg2.DatabaseError) as error: print(error) finally: if conn is not None: conn.close()
def insert_timestamp_test(ts_list): """ insert multiple vendors into the vendors table """ sql = "INSERT INTO timestamptest VALUES(%s, %s, NULL, NULL)" conn = None try: params = BaseConfig() host_ = params.DB_SERVICE port_ = params.DB_PORT database_ = params.DB_NAME user_ = params.DB_USER password_ = params.DB_PASS conn = psycopg2.connect(host=host_, database=database_, user=user_, password=password_, port=port_) cur = conn.cursor() cur.executemany(sql, ts_list) conn.commit() cur.close() except (Exception, psycopg2.DatabaseError) as error: print(error) finally: if conn is not None: conn.close()
def gunicorn_config() -> AnyStr: """ Generate configuration information of gunicorn """ project_config = BaseConfig().config project_path = project_config['PROJECT_PATH'] gunicorn_config_path = os.path.join(project_path, 'config/gunicorn.py') get_cpu_count = cpu_count() if get_cpu_count > 8: worker_cpu = 9 elif get_cpu_count <= 2: worker_cpu = 3 else: worker_cpu = get_cpu_count * 2 + 1 backend_node = project_config['BACKEND_NODE'] port = backend_node.split(':')[-1] jinja_config = { 'host': f"0.0.0.0:{port}", 'worker': worker_cpu, 'loglevel': project_config['LOG_LEVEL'] } insert_jinja_template(project_path=project_path, out_path=gunicorn_config_path, template_name='gunicorn.jinja', jinja_config=jinja_config) return gunicorn_config_path
def __init__(self): skeleton = {'network': {'address': str, 'port': int, 'ssl': bool}, 'channels': list, 'user': {'nickname': str, 'realname': str, 'password': str, 'opername': str, 'operpass': str}, 'auth': {'oper': bool, 'nicksrv': bool, 'opersrv': bool}, 'modes': list } BaseConfig.__init__(self, 'config.yaml', skeleton, validate_config)
def __init__(self): super(Tweet, self).__init__() self.config = BaseConfig() self.config.checkAuthSettings() self.api = twitter.Api(self.config.TWT_CONSUMER_KEY, self.config.TWT_CONSUMER_SECRET, self.config.TWT_ACCESS_TOKEN, self.config.TWT_ACCESS_SECRET)
def __init__(self): super(Standings, self).__init__() self.config = BaseConfig() # API Data self.YEAR = date.today().year self.API_URL = 'http://api.sportradar.us/mlb-t5/seasontd/' + \ str(self.YEAR) + '/REG/standings.xml?api_key=' + self.config.API_KEY self.XML_NS = 'http://feed.elasticstats.com/schema/baseball/v5/mlb/standings.xsd'
def parse_config(config): base_config = BaseConfig(config["base_config"]) qiniu_config = S3Config(config["qiniu_config"]) sql_config = [SQLConfig(x) for x in config["sql_config"] ] if "sql_config" in config else None redis_config = RedisConfig( config["redis_config"]) if "redis_config" in config else None mongo_config = [MongoConfig(x) for x in config['mongo_config'] ] if "mongo_config" in config else None return base_config, sql_config, redis_config, qiniu_config, mongo_config
def backend(): from manage import app base_config = BaseConfig().config _port = int(base_config['BACKEND_NODE'].split(':')[-1]) log_level = base_config['LOG_LEVEL'] if log_level == 'debug': debug = True else: debug = False app.run(host='0.0.0.0', port=_port, debug=debug)
def test_get_bd_uri_function(self): expected_result = "db_type_example://db_user_example:db_password_example@db_endpoint_example/db_name_example" actual_result = BaseConfig.build_db_uri( db_type_arg="db_type_example", db_user_arg="db_user_example", db_password_arg="db_password_example", db_endpoint_arg="db_endpoint_example", db_name_arg="db_name_example", ) self.assertEqual(expected_result, actual_result)
def data_loaders(shuffle=False): config = BaseConfig() batch_size = config.batch_size transform = transforms.Compose([ transforms.Scale((config.height, config.width)), transforms.ToTensor(), transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5)) ]) tr_csv_path, val_csv_path, te_csv_path, feature_name = config.tr_csv_path, config.val_csv_path, config.te_csv_path, config.feature_name encoder = train_encoder(tr_csv_path, val_csv_path, te_csv_path, feature_name) tr_dataset = LabelDataset(image_folder=config.img_folder, transform=transform, le=encoder, csv_path=tr_csv_path, feature_name=feature_name) val_dataset = LabelDataset(image_folder=config.img_folder, transform=transform, le=encoder, csv_path=val_csv_path, feature_name=feature_name) te_dataset = LabelDataset(image_folder=config.img_folder, transform=transform, le=encoder, csv_path=te_csv_path, feature_name=feature_name) kwargs = { 'num_workers': 16, 'pin_memory': True } if torch.cuda.is_available() else {} tr_data_loader = DataLoader(tr_dataset, shuffle=shuffle, batch_size=batch_size, **kwargs) val_data_loader = DataLoader(val_dataset, shuffle=shuffle, batch_size=batch_size, **kwargs) te_data_loader = DataLoader(te_dataset, shuffle=shuffle, batch_size=batch_size, **kwargs) return tr_data_loader, val_data_loader, te_data_loader
def timer_tasks(): import asyncio import uvloop from mode import Worker from app.services.tasks_scheduler.timer_tasks.app.base import app asyncio.set_event_loop_policy(uvloop.EventLoopPolicy()) loop = asyncio.get_event_loop() base_config = BaseConfig().config log_level = base_config['LOG_LEVEL'] worker = Worker(app, loglevel=log_level, loop=loop) worker.execute_from_commandline()
def async_tasks(): import uvicorn from app.services.tasks_scheduler.async_tasks.app import app base_config = BaseConfig().config _port = int(base_config['ASYNC_TASKS_NODE'].split(':')[-1]) log_level = base_config['LOG_LEVEL'] uvicorn.run(app, host='0.0.0.0', port=_port, loop='uvloop', log_level=log_level)
def run(): device = 0 if torch.cuda.is_available() else -1 config = BaseConfig() logging.info('%s_cross_entropy/ckpt.pth.tar' % config.result_dir) if os.path.exists('%s_cross_entropy/ckpt.pth.tar' % config.result_dir): return True logging.info("Triplet Trainer Not Return") create_dirs() tr_data_loader, val_data_loader, te_data_loader = loaders.data_loaders( shuffle=True) model = getattr(models, config.network)(num_classes=len(tr_data_loader.dataset.y)) model criterion = CrossEntropyLoss() if device == 0: model.cuda() criterion.cuda() trainer = ModuleTrainer(model) epochs = config.epochs callbacks = [ EarlyStopping(monitor='val_acc', patience=20), ModelCheckpoint('%s_cross_entropy' % config.result_dir, save_best_only=True, verbose=1), CSVLogger("%s_cross_entropy/logger.csv" % config.result_dir) ] metrics = [CategoricalAccuracy()] trainer.compile(loss=criterion, optimizer='adam', metrics=metrics) trainer.set_callbacks(callbacks) trainer.fit_loader(tr_data_loader, val_loader=val_data_loader, num_epoch=epochs, verbose=2, cuda_device=device) tr_loss = trainer.evaluate_loader(tr_data_loader, cuda_device=device) logging.info(tr_loss) val_loss = trainer.evaluate_loader(val_data_loader, cuda_device=device) logging.info(val_loss) te_loss = trainer.evaluate_loader(te_data_loader, cuda_device=device) logging.info(te_loss) with open('%s_cross_entropy' % config.log_path, "a") as f: f.write('Train %s\nVal:%s\nTest:%s\n' % (str(tr_loss), str(val_loss), te_loss))
def __init__(self): super(Schedule, self).__init__() self.TODAY = date.today() self.MONTH = datetime.now().strftime("%m") self.YEAR = self.TODAY.year self.TZ = {'CT': pytz.timezone('US/Central')} # API Configuration self.config = BaseConfig() self.API_URL = 'http://api.sportradar.us/mlb-t5/games/' + str( self.YEAR) + '/REG/schedule.xml?api_key=' + self.config.API_KEY self.XML_NS = '{http://feed.elasticstats.com/schema/baseball/v5/schedule.xsd}' self.XML_FILE = urllib2.urlopen(self.API_URL) self.CWD = os.path.dirname(os.path.realpath(__file__)) self.TREE = ET.parse(self.XML_FILE) self.ROOT = self.TREE.getroot()
def create_app(test_config=False): app = Flask(__name__) config = TestConfig() if test_config else BaseConfig() app.config.from_object(config) app.register_blueprint(api_blueprint) jwt = JWTManager() jwt.init_app(app) db.init_app(app) migrate = Migrate(app, db) logging.basicConfig( format='%(asctime)s - %(name)s:%(message)s', filename=Path(__file__, '../../app.log').resolve(), level=logging.DEBUG, ) return app
def upload_file(): if request.method == 'POST': # check if the post request has the file part if 'file' not in request.files: flash('No file part') return redirect(request.url) file = request.files['file'] if file.filename == '': flash('No file selected for uploading') return redirect(request.url) if file and BaseConfig.allowed_file(file.filename): #filename = secure_filename(file.filename) file.save(os.path.join(app.config['UPLOAD_FOLDER'], file.filename)) classifier = Cifar10AudioClassifier() classifier.load_model(model_dir_path='./deeplearning/demo/models') predicted_label_id = classifier.predict_class( './deeplearning/demo/data/audio_samples/' + file.filename) predicted_label = afrago_labels[predicted_label_id] print('predicted: ', predicted_label) flash(predicted_label) return redirect('/upload')
def supervisord_config(run_services: List = None) -> None: """ run_services: ['backend', 'streams_engine', 'tasks_scheduler'] """ project_config = BaseConfig().config project_config['USERNAME'] = getpass.getuser() project_path = project_config['PROJECT_PATH'] venv_path = _get_virtualenv_path() program_group = [] services_config = [] service_func_dict = { 'backend': _backend_config, 'tasks_scheduler': _tasks_scheduler_config } if not run_services: run_services = ['backend', 'tasks_scheduler'] for run_service in run_services: service_func = service_func_dict[run_service] service_config, group_name = service_func(venv_path, project_config) program_group.extend(group_name) services_config.extend(service_config) supervisor_path = os.path.join(project_path, 'config/actorcloud_supervisord.conf') jinja_config = { 'group_programs': ','.join(program_group), 'services': services_config } insert_jinja_template(project_path=project_path, out_path=supervisor_path, template_name='supervisor.jinja', jinja_config=jinja_config) info = "Generate supervisor config successfully!" print(info)
from fastapi import FastAPI from config import BaseConfig config = BaseConfig() app = FastAPI(title='TeamChat clone') from . import routes from . import sockets app.mount('/ws', sockets.sio_app)
def online_triplet_loaders(): config = BaseConfig() batch_size = config.batch_size transform = transforms.Compose([ transforms.Scale((config.height, config.width)), transforms.ToTensor(), transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5)) ]) tr_csv_path, val_csv_path, te_csv_path, feature_name = config.tr_csv_path, config.val_csv_path, config.te_csv_path, config.feature_name encoder = train_encoder(tr_csv_path, val_csv_path, te_csv_path, feature_name) tr_dataset = LabelDataset(image_folder=config.img_folder, transform=transform, le=encoder, csv_path=tr_csv_path, feature_name=feature_name) val_dataset = LabelDataset(image_folder=config.img_folder, transform=transform, le=encoder, csv_path=val_csv_path, feature_name=feature_name) te_dataset = LabelDataset(image_folder=config.img_folder, transform=transform, le=encoder, csv_path=te_csv_path, feature_name=feature_name) n_classes = 8 n_tr_classes = len(set(tr_dataset.y)) if n_tr_classes < n_classes: n_classes = n_tr_classes n_samples = int(batch_size / n_classes) train_batch_sampler = BalancedBatchSampler(tr_dataset.X, tr_dataset.y, n_classes=n_classes, n_samples=n_samples) val_batch_sampler = BalancedBatchSampler(val_dataset.X, val_dataset.y, n_classes=n_classes, n_samples=n_samples) test_batch_sampler = BalancedBatchSampler(te_dataset.X, te_dataset.y, n_classes=n_classes, n_samples=n_samples) kwargs = { 'num_workers': 16, 'pin_memory': True } if torch.cuda.is_available() else {} tr_data_loader = DataLoader(tr_dataset, batch_sampler=train_batch_sampler, **kwargs) val_data_loader = DataLoader(val_dataset, batch_sampler=val_batch_sampler, **kwargs) te_data_loader = DataLoader(te_dataset, batch_sampler=test_batch_sampler, **kwargs) return tr_data_loader, val_data_loader, te_data_loader
def create_tables(): """ create tables in the PostgreSQL database""" commands = ( """ CREATE TABLE courses ( course_number VARCHAR(255), term VARCHAR(255), title VARCHAR(255), instructor VARCHAR(255), instructor_netid VARCHAR(255), piazza_nid VARCHAR(255) DEFAULT NULL, piazza_netid VARCHAR(255) DEFAULT NULL, piazza_passwd VARCHAR(255) DEFAULT NULL, readts INTEGER DEFAULT 0, writets INTEGER DEFAULT 0, PRIMARY KEY(course_number, term) ) """, """ CREATE TABLE faculty ( netid VARCHAR(255), firstname VARCHAR(255) NOT NULL, lastname VARCHAR(255) NOT NULL, email VARCHAR(255), dept VARCHAR(255) DEFAULT 'ECE', office_number INTEGER, term VARCHAR(255), course_number VARCHAR(255), readts INTEGER DEFAULT 0, writets INTEGER DEFAULT 0, PRIMARY KEY(netid, term, course_number), FOREIGN KEY (course_number, term) REFERENCES courses (course_number, term) ON DELETE CASCADE -- FOREIGN KEY(netid) -- REFERENCES login_details(netid) -- ON UPDATE CASCADE -- ON DELETE CASCADE ) """, """ CREATE TABLE students ( netid VARCHAR(255) PRIMARY KEY, firstname VARCHAR(255) NOT NULL, lastname VARCHAR(255) NOT NULL, email VARCHAR(255), dept VARCHAR(255) DEFAULT 'ECE', year VARCHAR(255) DEFAULT 'Grad', readts INTEGER DEFAULT 0, writets INTEGER DEFAULT 0 -- FOREIGN KEY(netid) -- REFERENCES login_details(netid) -- ON UPDATE CASCADE -- ON DELETE CASCADE ) """ #create more tables below as necessary looking at the Relational Schema ) conn = None try: #DB_USER, DB_PASS, DB_SERVICE, DB_PORT, DB_NAME params = BaseConfig() host_ = params.DB_SERVICE port_ = params.DB_PORT database_ = params.DB_NAME user_ = params.DB_USER password_ = params.DB_PASS conn = psycopg2.connect(host=host_, database=database_, user=user_, password=password_, port=port_) cur = conn.cursor() for command in commands: cur.execute(command) cur.close() conn.commit() except (Exception, psycopg2.DatabaseError) as error: print(error) finally: if conn is not None: conn.close()
def create_concurrency_triggers( ): #### may still need to write an insert trigger """ create concurrency triggers for all sql tables """ commands = (""" CREATE OR REPLACE FUNCTION trigger_update_timestamp() RETURNS trigger AS $$ BEGIN RAISE NOTICE 'Checking ability to read/write'; IF NEW.writets IS NOT NULL THEN RAISE NOTICE 'Checking for ability to write'; IF (OLD.writets IS NOT NULL AND OLD.writets>NEW.writets) OR (OLD.readts IS NOT NULL AND OLD.readts>NEW.writets) THEN RAISE EXCEPTION 'UPDATE concurrency: row has been read or written to by a more recent transaction'; ELSE NEW.readts = OLD.readts; END IF; ELSEIF NEW.readts IS NOT NULL THEN RAISE NOTICE 'Checking for ability to read'; IF OLD.writets IS NOT NULL AND OLD.writets>NEW.readts THEN RAISE EXCEPTION 'READ concurrency: row has been written to by a more recent transaction'; ELSE IF OLD.readts IS NOT NULL AND OLD.readts>NEW.readts THEN NEW.readts = OLD.readts; END IF; NEW.writets = OLD.writets; END IF; END IF; RETURN NEW; END; $$ LANGUAGE plpgsql; """, """ CREATE TRIGGER update_timestamp BEFORE UPDATE ON timestamptest FOR EACH ROW EXECUTE PROCEDURE trigger_update_timestamp(); """, """ CREATE TRIGGER update_timestamp1 BEFORE UPDATE ON courses FOR EACH ROW EXECUTE PROCEDURE trigger_update_timestamp(); """, """ CREATE TRIGGER update_timestamp2 BEFORE UPDATE ON faculty FOR EACH ROW EXECUTE PROCEDURE trigger_update_timestamp(); """, """ CREATE TRIGGER update_timestamp3 BEFORE UPDATE ON students FOR EACH ROW EXECUTE PROCEDURE trigger_update_timestamp(); """, """ CREATE TRIGGER update_timestamp4 BEFORE UPDATE ON session FOR EACH ROW EXECUTE PROCEDURE trigger_update_timestamp(); """, """ CREATE TRIGGER update_timestamp5 BEFORE UPDATE ON student_question FOR EACH ROW EXECUTE PROCEDURE trigger_update_timestamp(); """, """ CREATE TRIGGER update_timestamp6 BEFORE UPDATE ON upvotes FOR EACH ROW EXECUTE PROCEDURE trigger_update_timestamp(); """, """ CREATE TRIGGER update_timestamp7 BEFORE UPDATE ON iclickerresponse FOR EACH ROW EXECUTE PROCEDURE trigger_update_timestamp(); """, """ CREATE TRIGGER update_timestamp8 BEFORE UPDATE ON enrollment FOR EACH ROW EXECUTE PROCEDURE trigger_update_timestamp(); """, """ CREATE TRIGGER update_timestamp9 BEFORE UPDATE ON iclickerresponse FOR EACH ROW EXECUTE PROCEDURE trigger_update_timestamp(); """) conn = None try: params = BaseConfig() host_ = params.DB_SERVICE port_ = params.DB_PORT database_ = params.DB_NAME user_ = params.DB_USER password_ = params.DB_PASS conn = psycopg2.connect(host=host_, database=database_, user=user_, password=password_, port=port_) cur = conn.cursor() for command in commands: cur.execute(command) cur.close() conn.commit() except (Exception, psycopg2.DatabaseError) as error: print(error) finally: if conn is not None: conn.close()
def parse_config(config): base_config = BaseConfig(config["base_config"]) lark_config = LarkConfig(config["lark_config"]) sql_config = [ SQLConfig(x) for x in config["sql_config"] ] if "sql_config" in config else None return base_config, sql_config, lark_config
def filterRegions(self, region_list, percentage=InstagramConfig.region_percentage, test=False, n=10, m=10, element_type='photos'): assert element_type in ['photos', 'tweets'] if test: #n and m should be set if test is true #this is only for test new_region_list = [] #folder = '/res/users/kx19/Citybeat/CityBeat/distributed_gp/utility/region_cache/' # grand : res ; joust : grad folder = BaseConfig.getRegionListPath() file_name = element_type + '_' file_name += str(n) + '_' + str(m) + '.txt' fid = open(folder + file_name) for line in fid: region = line.split() for i in xrange(0, 4): region[i] = float(region[i]) region = Region(region) new_region_list.append(region) return new_region_list # this method should not be a member of this class # TODO: change the period to one week # end_time = 1359704845 # begin_time = 1299704845 end_time = 1962096000 begin_time = 1362096000 if element_type == 'photos': di = PhotoInterface() else: di = TweetInterface() document_cur = di.rangeQuery(period=[str(begin_time), str(end_time)]) region_number = len(region_list) number_document_in_region = [0] * region_number bad_documents = 0 total_documents = 0 for document in document_cur: total_documents += 1 lat = float(document['location']['latitude']) lng = float(document['location']['longitude']) flag = 0 for i in xrange(region_number): if region_list[i].insideRegion([lat, lng]): number_document_in_region[i] += 1 flag = 1 break if flag == 0: bad_documents += 1 print str(bad_documents) + ' out of ' + str(total_documents) + ' documents are bad(not in NY)' region_tuples = [] for i in xrange(0, region_number): region_tuples.append((region_list[i], number_document_in_region[i])) region_tuples.sort(key=operator.itemgetter(1), reverse=True) valid_region_number = int(0.5 + 1.0 * region_number * percentage) valid_regions = [] # print region_tuples[valid_region_number-1][1] for i in xrange(0, valid_region_number): region = region_tuples[i][0] lat = (self._region['min_lat'] + self._region['max_lat']) / 2 lng = (self._region['min_lng'] + self._region['max_lng']) / 2 cnt = region_tuples[i][1] for i in xrange(0, valid_region_number): valid_regions.append(region_tuples[i][0]) return valid_regions
import asyncio import uvicorn import uvloop from app.services.tasks_scheduler.async_tasks.app import app from config import BaseConfig asyncio.set_event_loop_policy(uvloop.EventLoopPolicy()) if __name__ == '__main__': base_config = BaseConfig().config _port = int(base_config['ASYNC_TASKS_NODE'].split(':')[-1]) log_level = base_config['LOG_LEVEL'] uvicorn.run(app, host='0.0.0.0', port=_port, loop='uvloop', log_level=log_level)
# GNU Lesser General Public License for more details. # # You should have received a copy of the GNU Lesser General Public License # along with py-ispyb. If not, see <http://www.gnu.org/licenses/>. import os import sys import csv import MySQLdb ispyb_root = os.path.dirname(os.path.abspath(__file__)).split(os.sep) ispyb_root = "/" + os.path.join(*ispyb_root[1:-1]) sys.path.insert(0, ispyb_root) from config import BaseConfig config = BaseConfig(os.path.join(ispyb_root, "ispyb_core_config.yml")) uri = config.SQLALCHEMY_DATABASE_URI # mysql://ispyb_api:password_1234@localhost/ispyb_test user = uri.split("//")[1].split(":")[0] passwd = uri.split("//")[1].split(":")[1].split("@")[0] host = uri.split("@")[1].split("/")[0] db_name = uri.split("/")[-1] gen_tables = [] gen_modules = [] with open("%s/scripts/core_db_mapping.csv" % ispyb_root) as csvfile: reader = csv.reader(csvfile) for row in reader: gen_modules.append(row[0])
def get_default_device_count(): """ Get the number of devices that tenant can manage """ base_config = BaseConfig() default_devices_limit = base_config.config['DEFAULT_DEVICES_LIMIT'] return str(default_devices_limit)
''' parser = argparse.ArgumentParser(description='Chinese Text Classification') parser.add_argument('--model', default='bert', type=str, help='choose a model in bert, ernie, roberta') parser.add_argument('--cuda', action='store_true', help='True use GPU, False use CPU') args = parser.parse_args() if __name__ == '__main__': model_name = args.model base_config = BaseConfig(args.cuda, model_name) model_config = BertConfig.from_pretrained( base_config.pretrained_path + '/config.json', num_labels=base_config.label_number) tokenizer = BertTokenizer.from_pretrained(base_config.pretrained_path, do_lower_case=True) model = BertForSequenceClassification.from_pretrained( base_config.pretrained_path, config=model_config) model.to(base_config.device) np.random.seed(42) torch.manual_seed(42) start_time = time.time() print("Loading data...") train_dataset = load_and_cache_examples(base_config,
def main(_): #tf.logging.set_verbosity(tf.logging.INFO) tf.logging.set_verbosity(tf.logging.DEBUG) os.environ["CUDA_VISIBLE_DEVICES"] = FLAGS.gpu_id processor = SenpairProcessor() if not FLAGS.do_train and not FLAGS.do_eval and not FLAGS.do_predict: raise ValueError( "At least one of `do_train`, `do_eval` or `do_predict' must be True." ) config = BaseConfig.from_json_file(FLAGS.config_file) tf.gfile.MakeDirs(FLAGS.output_dir) tokenizer = tokenization.Tokenizer(vocab_file=FLAGS.vocab_file, stop_words_file=FLAGS.stop_words_file, use_pos=False) run_config = None num_train_steps = 0 num_warmup_steps = 0 if FLAGS.do_train: train_examples = processor.get_train_examples(FLAGS.input_file) num_train_steps = int( len(train_examples) / FLAGS.batch_size * FLAGS.num_train_epochs) num_warmup_steps = FLAGS.num_warmup_steps run_config = tf.estimator.RunConfig( save_summary_steps=100, save_checkpoints_steps=num_train_steps / FLAGS.num_train_epochs, keep_checkpoint_max=5, ) embedding_table = None if FLAGS.embedding_table is not None: embedding_table = load_embedding_table(FLAGS.embedding_table, FLAGS.vocab_file) model_fn = model_fn_builder(config=config, learning_rate=FLAGS.learning_rate, task=FLAGS.task_type, single_text=FLAGS.single_text, init_checkpoint=FLAGS.init_checkpoint, num_train_steps=num_train_steps, num_warmup_steps=num_warmup_steps, embedding_table_value=embedding_table, embedding_table_trainable=False, model_name=FLAGS.model_name) params = {"batch_size": FLAGS.batch_size} estimator = tf.estimator.Estimator(model_fn=model_fn, model_dir=FLAGS.output_dir, config=run_config, params=params) if FLAGS.do_train: if FLAGS.cached_tfrecord: train_file = FLAGS.cached_tfrecord else: train_file = os.path.join(FLAGS.output_dir, "train.tf_record") if not os.path.exists(train_file): file_based_convert_examples_to_features(train_examples, FLAGS.max_seq_length, tokenizer, train_file, do_token=FLAGS.do_token) tf.logging.info("***** Running training *****") tf.logging.info(" Num examples = %d", len(train_examples)) tf.logging.info(" Batch size = %d", FLAGS.batch_size) tf.logging.info(" Num steps = %d", num_train_steps) del train_examples # 释放train_examples内存 train_input_fn = file_based_input_fn_builder( input_file=train_file, seq_length=FLAGS.max_seq_length, is_training=True, ) estimator.train(input_fn=train_input_fn, max_steps=num_train_steps) elif FLAGS.do_eval: dev_examples = processor.get_train_examples(FLAGS.input_file) if FLAGS.cached_tfrecord: dev_file = FLAGS.cached_tfrecord else: dev_file = os.path.join(FLAGS.output_dir, "dev.tf_record") if not os.path.exists(dev_file): file_based_convert_examples_to_features(dev_examples, FLAGS.max_seq_length, tokenizer, dev_file) tf.logging.info("***** Running evaluation *****") tf.logging.info(" Num examples = %d", len(dev_examples)) tf.logging.info(" Batch size = %d", FLAGS.batch_size) del dev_examples eval_input_fn = file_based_input_fn_builder( input_file=dev_file, seq_length=FLAGS.max_seq_length, is_training=False) if FLAGS.eval_model is not None: eval_model_path = os.path.join(FLAGS.output_dir, FLAGS.eval_model) else: eval_model_path = None result = estimator.evaluate(input_fn=eval_input_fn, checkpoint_path=eval_model_path) eval_output_file = os.path.join(FLAGS.output_dir, "eval_results.txt") with tf.gfile.GFile(eval_output_file, "w") as writer: tf.logging.info("***** Eval results *****") for key in sorted(result.keys()): tf.logging.info(" %s = %s", key, str(result[key])) writer.write("%s = %s\n" % (key, str(result[key]))) else: predict_examples = processor.get_test_examples(FLAGS.input_file) predict_file = os.path.join(FLAGS.output_dir, "predict.tf_record") file_based_convert_examples_to_features(predict_examples, FLAGS.max_seq_length, tokenizer, predict_file, set_type="test", label_type="int", single_text=FLAGS.single_text) tf.logging.info("***** Running prediction*****") tf.logging.info(" Num examples = %d", len(predict_examples)) tf.logging.info(" Batch size = %d", FLAGS.batch_size) predict_input_fn = file_based_input_fn_builder( input_file=predict_file, seq_length=FLAGS.max_seq_length, is_training=False, single_text=FLAGS.single_text) if FLAGS.pred_model is not None: pred_model_path = os.path.join(FLAGS.output_dir, FLAGS.pred_model) else: pred_model_path = None result = estimator.predict(input_fn=predict_input_fn, checkpoint_path=pred_model_path) output_predict_file = os.path.join(FLAGS.output_dir, "test_results.tsv") with tf.gfile.GFile(output_predict_file, "w") as writer: tf.logging.info("***** Predict results *****") for (i, prediction) in enumerate(result): sen_a_embedding = prediction["sen_a_embedding"] input_ids_a = prediction["input_ids_a"] keyword_probs_a = prediction["keyword_probs_a"] if not FLAGS.single_text: sen_b_embedding = prediction["sen_b_embedding"] input_ids_b = prediction["input_ids_b"] keyword_probs_b = prediction["keyword_probs_b"] sorted_keyword_idx_a = np.argsort(-keyword_probs_a) extracted_keywords_a = [] for idx in sorted_keyword_idx_a: word_id = input_ids_a[idx] word_prob = keyword_probs_a[idx] word = tokenizer.convert_ids_to_tokens([word_id])[0] extracted_keywords_a.append([word, word_prob]) keyword_output_a = " ".join([ "%s:%f" % (kw, prob) for kw, prob in extracted_keywords_a ]) text_output_a = " ".join( tokenizer.convert_ids_to_tokens(input_ids_a)) writer.write("%s\t%s" % (keyword_output_a, text_output_a)) writer.write("\n")
def format_timestamp(timestamp, fmt='YYYY-MM-DD HH:mm:ss'): from config import BaseConfig return arrow.get(timestamp).to( BaseConfig().config.get('TIMEZONE')).format(fmt)