def scrape_game(game): print(game['id']) url = 'http://www.baseball-reference.com%s' % game['fragment'] resp = requests.get(url) resp.raise_for_status() bs = BeautifulSoup(resp.text, 'html.parser') all_links = all_links_from_bs(bs) pbp_bs = BeautifulSoup(_uncomment(str(bs.find(id='all_play_by_play'))), 'html.parser') all_trs = pbp_bs.find_all('tr') all_event_trs = [tr for tr in all_trs if _is_event(tr)] for tr in all_event_trs: event = event_from_row(tr) event['game_id'] = game['id'] event = replace_names_with_player_ids(event, all_links) db.insert_row('events', event) db.commit_with_retries()
def add_user(args: list): """ 添加用户 :param args: [学号,邮箱,订阅,[订阅]] :return: None """ config = configparser.ConfigParser() config.read('NoticeReminder.ini', 'utf-8') department_table_names = config['Database']['DepartmentTableNames'].split( ',') n = len(department_table_names) if len(args ) != 2 + n: # 2 means sid and email, n means number of department print('Missing arguments or too many arguments') return if not is_sid_correct(args[0]): print('SID incorrect, only can be 8-9 digits pure number') return if re.search(r'[0-9a-zA-Z_\-.]+@[0-9a-zA-Z]+\.[0-9a-zA-Z.]', args[1]) is None: # 检查邮箱格式 print('Email incorrect') return for arg in args[2:]: if not arg.isdigit(): print('Subscription argument incorrect, only can be 0 or 1') return db.insert_row(config['Database']['DatabaseName'], config['Database']['UserTableName'], 'sid, email, ' + ', '.join(department_table_names), "'%s', '%s', " % (args[0], args[1]) + ', '.join(args[2:])) log_info = 'ADD USER: <%s> <%s> <' % (args[0], args[1]) + '> <'.join( args[2:]) + '>' logging.info(log_info) print(log_info)
def refine_pattern(data): send('refine pattern request received') send('Loading pattern') pattern_id = data['pattern_id'] feature_dict = data.get('feature_dict') if not feature_dict: feature_dict = DEFAULT_REFINE_PATTERN_FEATURE_DICT role_pattern = db.load_role_pattern(pattern_id) send('Loading matches') pos_match_id = data['pos_match_id'] neg_match_ids = data['neg_match_ids'] pos_match_row = db.fetch_row('matches', pos_match_id, return_type='dict') if not pos_match_row: emit('error', 'no row found for pos match id: {}'.format(pos_match_id)) neg_match_rows = db.fetch_rows('matches', neg_match_ids, return_type='dict') for id_, row in zip(neg_match_ids, neg_match_rows): if not row: emit('error', 'no row found for neg match id: {}'.format(id_)) send('preparing training data') pos_match_sentence_id = pos_match_row['sentence_id'] pos_match = json.loads(pos_match_row['data']) pos_match = db.spacify_match(pos_match, pos_match_sentence_id) neg_matches = [] for neg_match_row in neg_match_rows: sentence_id = neg_match_row['sentence_id'] neg_match = json.loads(neg_match_row['data']) neg_match = db.spacify_match(neg_match, sentence_id) neg_matches.append(neg_match) send('calculating pattern') feature_dict = {'DEP': 'dep_', 'TAG': 'tag_', 'LOWER': 'lower_'} role_pattern_builder = RolePatternBuilder(feature_dict) role_pattern_variants = role_pattern_builder.refine( role_pattern, pos_match, neg_matches) role_pattern_variants = list(role_pattern_variants) try: # Try to take the first pattern refined_pattern = role_pattern_variants[0] except IndexError as e: # None meet the criteria refined_pattern = None if refined_pattern: send('success. saving pattern') pattern_row = { 'name': 'unamed_pattern', 'role_pattern_instance': pickle.dumps(role_pattern), } pattern_id = db.insert_row('patterns', pattern_row) send('pattern saved: {}'.format(pattern_id)) else: send('pattern refinement unsuccessful') emit('refine_pattern_success')
'TAG': 'tag_', '_': { 'valence': 'valence' }, } new_training_match_row = { 'sentence_id': corresponding_sentence_id, 'data': json.dumps({ 'slots': new_training_match_slots, 'feature_dict': training_match_feature_dict, }), } match_id = db.insert_row('matches', new_training_match_row, db_path=new_db_path) progress['training_matches_inserted'].append(match_id) util.write_progress(progress) sio.emit( 'build_pattern', data={ 'pos_match_id': match_id, 'feature_dict': training_match_feature_dict, 'pattern_id': pattern_id, }, )
import db from .sds011 import SDS011 from .tests import run_all_tests logging.getLogger(__name__) sensor = SDS011("/dev/ttyUSB0") if sensor: run_all_tests(sensor) sensor.set_duty_cycle(1) connection = db.create_connection(db.DB_NAME) if connection is None: logging.error( f"DB [{db.DB_NAME}]: cannot create the database connection!") with connection: db.create_table(conn=connection, create_table_sql=db.SQL_CREATE_TABLE) while True: try: r = sensor.sender.read() if sensor.sender.is_valid_active_response(r): data = sensor.extract_pm_values(r) row = (data["pm10"], data["pm25"], data["time"]) db.insert_row(conn=connection, row=row) except KeyboardInterrupt: exit("\nBye!")
match_data = json.loads(match_row['match_data']) slots, match_tokens = match_data['slots'], match_data['match_tokens'] match = db.load_role_pattern_match(slots, match_tokens, match_sentence_id, db_path=new_db_path) is_training_match = util.matches_are_equal(match, training_match) if is_training_match: training_match_equivalent_candidates.append(match) training_match_equivalent_candidates_ids.append(match_id) if not training_match_equivalent_candidates: print('Training match equivalent not found') continue if len(training_match_equivalent_candidates) > 1: print('Multiple training match equivalents found') # pprint(training_match_equivalent_candidates) # continue pattern_training_match_row = { 'match_id': training_match_equivalent_candidates_ids[0], 'pattern_id': pattern_id, 'pos_or_neg': 'pos', } db.insert_row('pattern_training_matches', pattern_training_match_row, db_path=new_db_path) progress['pattern_ids_training_matches_mapped'].append(pattern_id) util.write_progress(progress)
progress = util.read_progress() n_patterns_to_insert = len(pattern_ids_to_insert) for pattern_id in pattern_ids_to_insert: if pattern_id not in pattern_ids_to_insert: continue if pattern_id in progress['pattern_ids_inserted']: continue print('pattern_id', pattern_id) # Load RolePattern role_pattern_path = os.path.join(config['patterns_output_dir'], '{}.p'.format(pattern_id)) try: with open(role_pattern_path, 'rb') as f: role_pattern = pickle.load(f) except: role_pattern = db.load_role_pattern(pattern_id) token_labels = role_pattern.token_labels role_pattern_bytes = pickle.dumps(role_pattern) pattern_row = { 'id': pattern_id, 'role_pattern_instance': role_pattern_bytes, 'data': json.dumps({'token_labels': token_labels}), } pattern_id = db.insert_row('patterns', pattern_row, db_path=config['new_db_file_path']) progress['pattern_ids_inserted'].append(pattern_id) print(len(progress['pattern_ids_inserted']), '/', n_patterns_to_insert) util.write_progress(progress)