def test_check_url(self): for url in self.valid_urls: self.assertTrue(utils.check_url(url), "check valid urls as understood pass") for url in self.invalid_urls: self.assertFalse(utils.check_url(url), "check invalid urls caught")
def create_reference_detail(request, **kwargs): if request.method == 'GET' and 'url' in request.GET: try: url, title = check_url(request.GET['url']) kwargs.setdefault('extra_context', {}).update({'parsed_url': url, 'parsed_title': title,}) except IOError, e: messages.error(request, _(e), fail_silently=True)
def check_arg_input(parser, args): if not args.input: parser.error("-i, --input INPUT is required.") loader = Loader.get_loader(args.input) if loader == FSLoader: if os.path.isfile( args.input) and not is_a_supported_image_file_extension( args.input): parser.error("Input {} file not supported format.".format( args.input)) if os.path.isfile(args.input): check_image_file_validity(args.input) elif loader == HTTPLoader: if not check_url(args.input): parser.error( "Url {} of the http ressource doesn't exist or is not accesible." .format(args.input)) if not is_a_supported_image_file_extension(args.input): parser.error( "Url {} is not file with a supported extension format.".format( args.input)) else: parser.error( "Input {} is not a valid file or directory or url.".format( args.input)) return args.input
def POST(self, userurlkey=None): url=web.data() if len(url) > config.MAX_LEN_URL: web.ctx.status="400 Bad request" return "url too long" if userurlkey is not None: print userurlkey if len(userurlkey) < config.MIN_LEN_USERURLKEY: web.ctx.status="400 Bad request" return "key too short" if len(userurlkey) > config.MAX_LEN_URLKEY: web.ctx.status="400 Bad request" return "key too long" userurlkey = str.lower(utils.encode_string(userurlkey)) url=utils.encode_string(url) if False == utils.check_url(url): web.ctx.status="400 Bad request" return "bad url" ret, n_affected, urlkey = model.url_new(url, userurlkey) if ret != 0: return web.internalerror("db error") retval = { "is_created": (n_affected==1 or True and False), "key": urlkey } web.ctx.status="200 OK" return json.dumps(retval)
def web_shorten(url): url = url.strip() if len(url) < 2 or utils.check_url(url) == False: return no_url() conn = utils.create_connection("test.db") check = utils.check_entry(url, conn) db_url = check[1] if check else False if db_url and db_url == url: conn.close() return already_used() shortcode = utils.make_key(6) _date = utils.get_date() utils.new_entry(url, shortcode, _date, _date, conn) conn.close() return shortcode
def shorten(): shortcode = "" if request.method == 'POST': received = request.get_json(force=True) url = received["url"] if received["url"] else "" if len(url) < 2 or utils.check_url(url) == False: return no_url() conn = utils.create_connection("test.db") check = utils.check_entry(url, conn) db_url = check[1] if check else False if db_url and db_url == url: conn.close() return already_used() try: shortcode = received["shortcode"] except KeyError: logging.warn("No shortcode provided, generating one...") shortcode = utils.make_key(6) if utils.check_shortcode(shortcode) == False: conn.close() return invalid_code() _date = utils.get_date() utils.new_entry(url, shortcode, _date, _date, conn) conn.close() return flask.make_response(shortcode, 201)
def shorten_url(): # Takes url encoded in json from the body and returns a shorted url if submitted is valid returns error otherwise if request.is_json: url = request.get_json()["url"] if utils.check_url(utils.convert(url)): return jsonify({ "shortened_url": '{}{}'.format(HOST, dal.add_url(url)) }), status.HTTP_201_CREATED return 'Malformed URL: {}'.format(url), status.HTTP_400_BAD_REQUEST
def check_url(self, url): valid, code = utils.check_url(url) if not valid: logger.warning('slice ' + str(self.slice) + ' partition_num ' + str(self.partition_num) + ' failed to get url '+ url) to_add_url = self.default_url change = True elif code == 200: logger.info('slice ' + str(self.slice) + ' partition_num ' + str(self.partition_num) + ' code ' + str(code) + ' ' + url) to_add_url = url change = False else: logger.waring('slice ' + str(self.slice) + ' partition_num ' + str(self.partition_num) + 'failed to get url ' + url + ' retcode ' + str(code)) to_add_url = self.default_url change = True return to_add_url, change
def redirect_url(code): # Takes variable from url and attempts to decode and check DB for a match. If a match found or the original is a # none coded url it will attempt redirect. If not it will error. if utils.check_url(utils.convert(code)): return redirect(utils.convert(code), code=302) else: try: result = dal.lookup_url(code) if result: return redirect(utils.convert(result), code=302) else: return 'Malformed URL: {}{}'.format( HOST, code), status.HTTP_400_BAD_REQUEST except: return 'Malformed URL: {}{}'.format( HOST, code), status.HTTP_400_BAD_REQUEST
def POST(self, urlkey): url=web.data() if len(url) > config.MAX_LEN_URL: web.ctx.status="400 Bad request" return "url too long" if urlkey is not None: if len(urlkey) > config.MAX_LEN_URLKEY: web.ctx.status="400 Bad request" return "key too long" urlkey = str.lower(utils.encode_string(urlkey)) url=utils.encode_string(url) if False == utils.check_url(url): web.ctx.status="400 Bad request" return "bad url" ret, n_affected = model.url_modify(urlkey, url) if ret != 0: return web.internalerror("db error") ret_val={ 'n_affected': n_affected} return json.dumps(ret_val)
def download_data_from_url(url, task_id, base_url=None, depth=1): folder = get_folder(task_id) if base_url is None: base_url = re.search(r'((https|http)://[\w_\-.]+)', url) if not base_url: raise requests.exceptions.InvalidURL( f"This is not a valid URL: {url}") base_url = base_url.group(1) response = requests.get(url) http_encoding = response.encoding if 'charset' in response.headers.get( 'content-type', '').lower() else None html_encoding = EncodingDetector.find_declared_encoding(response.content, is_html=True) encoding = html_encoding or http_encoding soup = BeautifulSoup(response.content, from_encoding=encoding) with open(folder + 'index.html', 'w', encoding='utf-8') as f: f.write(response.text) download_media(soup, folder, base_url) download_js(soup, folder, base_url) download_css(soup, folder, base_url) if depth > 0: links = set( map(lambda x: transform_url(x, base_url), find_another_urls(soup))) for i, link in enumerate( filter(lambda x: check_url(x, base_url), links)): try: download_data_from_url(link, "{0}/{1}".format(task_id, i), base_url=base_url, depth=depth - 1) except requests.exceptions.RequestException as e: logging.error( f'Exception occurred while request to {url}\n {e}') return folder
def process_round_data(data): j = json.loads(data) items = j['dataFeederResponse']['req2']['row'] for item in items: try: date = datetime.datetime.strptime(item['LocalStartTime'], "%d/%m/%Y %I:%M:%S %p") year = int(date.year) short_year = date.strftime('%y') round = int(item['RoundName'].split(" ")[-1]) round_pad = "%02d" % round round_string = "RD%s" % round_pad round_short = round_string round_name = item['RoundName'] home_id = item['HomeTeamReference'].lower() away_id = item['AwayTeamReference'].lower() afl_id = int(item['Id']) final = False # Set the game type if item['SeasonName'].find('Final') > -1: type = 'final' round_string = "FinalsW%d" % round round_short = "FW%d" % round final = True else: type = 'premiership' # Set the teams for team in static.TEAMS: if team['id'] == home_id: home_val = team['val'] home_name = team['name'] if team['id'] == away_id: away_val = team['val'] away_name = team['name'] # Create a new match match = get_or_new_match(afl_id) match.hometeam = home_id match.awayteam = away_id match.date = date match.round = round match.type = type match.afl_id = afl_id match.thumbnail = '/img/thumb-match.jpg' match_videos = [] if year < 2011 and round < 10 and not final: video_url_med = "http://pd.streaming.telstra.com/pd_afl0/OnDemand/%d/ON/iVideo/Premiership/%s/NV_%s_%sV%s_1M.mp4" % (year, round_string, round_string.title(), home_val, away_val) if utils.check_url(video_url_med) != 404: video = get_or_new_video(video_url_med) video.name = "%s %s %s v %s" % (round_string.title(), year, home_name, away_name) video.thumbnail = "%sthumb/match-replay.jpg" % (settings.MEDIA_URL) video.date = date video.urls = [] # Test for low quality (172k stream) video_low_qual = re.sub("1[mM][bB]{,1}.mp4", "172K.mp4", video_url_med) if utils.check_url(video_low_qual) != 404: print("Found low-res video for %s" % video.name) video.urls.insert(static.QUAL_LOW, video_low_qual) else: video.urls.insert(static.QUAL_LOW, None) # Just blindly insert the medium quality stream video.urls.insert(static.QUAL_MED, video_url_med) # Test for high quality (2Mb stream) video_high_qual = re.sub("1[mM][bB]{,1}.mp4", "2M.mp4", video_url_med) if utils.check_url(video_high_qual) != 404: print("Found high-res video for %s" % video.name) video.urls.insert(static.QUAL_HIGH, video_high_qual) else: video.urls.insert(static.QUAL_HIGH, None) video = utils.tag_video(video, 'replay',) print("Saving video: %s" % video) video.save() match_videos.append(video.pk) else: for i, qtr in enumerate(['1st','2nd','3rd','4th']): video_url_med = "http://bptvpd.ngcdn.telstra.com/pd_afl0/OnDemand/%d/ON/iVideo/Premiership/%s/AFL%s_%s_%s_vs_%s_%s_qr_full_1M.mp4" % (year, round_string, short_year, round_short.lower(), home_id, away_id, qtr) if utils.check_url(video_url_med) != 404: video = get_or_new_video(video_url_med) if final: video.name = "%s %s %s v %s (%s Qtr)" % (round_name, year, home_name, away_name, qtr) else: video.name = "%s %s %s v %s (%s Qtr)" % (round_string.title(), year, home_name, away_name, qtr) video.thumbnail = "%sthumb/match-replay-%s-qtr.jpg" % (settings.MEDIA_URL, qtr) video.date = date video.urls = [] # Test for low quality (172k stream) video_low_qual = re.sub("1[mM][bB]{,1}.mp4", "172K.mp4", video_url_med) if utils.check_url(video_low_qual) != 404: print("Found low-res video for %s" % video.name) video.urls.insert(static.QUAL_LOW, video_low_qual) # Just blindly insert the medium quality stream video.urls.insert(static.QUAL_MED, video_url_med) # Test for high quality (2Mb stream) video_high_qual = re.sub("1[mM][bB]{,1}.mp4", "2M.mp4", video_url_med) if utils.check_url(video_high_qual) != 404: print("Found high-res video for %s" % video.name) video.urls.insert(static.QUAL_HIGH, video_high_qual) video = utils.tag_video(video, 'replay') print("Saving video: %s" % video) video.save() match_videos.append(video.pk) match.videos = match_videos print("Saving match: %s" % match.get_title()) r = match.save() except Exception, e: logging.exception("Failed to parse match")
def test_check_url(self): url = 'http://workflow.isi.edu/MINT/FLDAS/FLDAS_NOAH01_A_EA_D.001/2019/04/FLDAS_NOAH01_A_EA_D.A20190401.001.nc' print('url: ', url) result = utils.check_url(url) self.assertTrue(result)
def download_pitch_data_only(args, lm=None): # return True or False pdata_url = 'http://m.sports.naver.com/ajax/baseball/gamecenter/kbo/pitches.nhn' pdata_header_row = [ 'x0', 'y0', 'z0', 'vx0', 'vy0', 'vz0', 'ax', 'ay', 'az', 'plateX', 'plateZ', 'crossPlateX', 'crossPlateY', 'topSz', 'bottomSz', 'stuff', 'speed', 'pitcherName', 'batterName' ] game_ids = get_game_ids(args) if (game_ids is None) or (len(game_ids) == 0): print('no game ids') print('args: {}'.format(args)) if lm is not None: lm.log('no game ids') lm.log('args: {}'.format(args)) return False if lm is not None: lm.resetLogHandler() lm.setLogPath(os.getcwd()) lm.setLogFileName('pitch_data_download_log.txt') lm.cleanLog() lm.createLogHandler() lm.log('---- Pitch Data Download Log ----') if not os.path.isdir('pbp_data'): os.mkdir('pbp_data') os.chdir('pbp_data') # path: pbp_data print("##################################################") print("###### DOWNLOAD PITCH DATA #######") print("##################################################") for year in game_ids.keys(): start1 = time.time() print(" Year {}".format(year)) if len(game_ids[year]) == 0: print('month id is empty') print('args: {}'.format(args)) if lm is not None: lm.log('month id is empty') lm.log('args : {}'.format(args)) return False if not os.path.isdir(str(year)): os.mkdir(str(year)) os.chdir(str(year)) # path: pbp_data/year year_fp = open(f'{year}_pdata.csv', 'w', newline='\n') year_cf = csv.writer(year_fp) year_cf.writerow(pdata_header_row) for month in game_ids[year].keys(): start2 = time.time() print(" Month {}".format(month)) if len(game_ids[year][month]) == 0: print('month id is empty') print('args: {}'.format(args)) if lm is not None: lm.log('month id is empty') lm.log('args : {}'.format(args)) return False if not os.path.isdir(str(month)): os.mkdir(str(month)) os.chdir(str(month)) # path: pbp_data/year/month month_fp = open(f'{year}_{month}_pdata.csv', 'w', newline='\n') month_cf = csv.writer(month_fp) month_cf.writerow(pdata_header_row) # download done = 0 skipped = 0 for game_id in game_ids[year][month]: if (int(game_id[:4]) < 2008) or (int(game_id[:4]) > datetime.datetime.now().year): skipped += 1 continue if (int(game_id[:4]) == datetime.datetime.now().year) and (int( game_id[4:8]) > int( datetime.datetime.now().date().strftime('%m%d'))): skipped += 1 continue if int(game_id[4:8]) < int(regular_start[game_id[:4]]): skipped += 1 continue if int(game_id[4:8]) >= int(playoff_start[game_id[:4]]): skipped += 1 continue if game_id[8:10] not in teams: skipped += 1 continue if not check_url(pdata_url): skipped += 1 if lm is not None: lm.log('URL error : {}'.format(pdata_url)) continue if (int(game_id[:4]) == datetime.datetime.now().year) &\ (int(game_id[4:6]) == datetime.datetime.now().month) &\ (int(game_id[6:8]) == datetime.datetime.now().day): # do nothing done = done elif (os.path.isfile(game_id + '_pdata.json')) and \ (os.path.getsize(game_id + '_pdata.json') > 0): done += 1 if lm is not None: lm.log('File Duplicate : {}'.format(game_id)) continue params = {'gameId': game_id} headers = { 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) ' 'Chrome/59.0.3071.115 Safari/537.36', 'X-Requested-With': 'XMLHttpRequest', 'Host': 'm.sports.naver.com', 'Referer': 'http://m.sports.naver.com/baseball/gamecenter/kbo/index.nhn?&gameId=' + game_id + '&tab=relay' } response = requests.get(pdata_url, params=params, headers=headers) if response is not None: # load json structure js = response.json() if isinstance(js, str): js = json.loads(js) #js = ast.literal_eval(js) if js is None: lm.log('Pitch data missing : {}'.format(game_id)) skipped += 1 continue elif len(js) == 0: lm.log('Pitch data missing : {}'.format(game_id)) skipped += 1 continue # json to pandas dataframe #df = pd.read_json(json.dumps(js)) df = pd.DataFrame(js) # calculate pitch location(px, pz) t = -df['vy0'] - np.sqrt(df['vy0'] * df['vy0'] - 2 * df['ay'] * (df['y0'] - df['crossPlateY'])) t /= df['ay'] xp = df['x0'] + df['vx0'] * t + df['ax'] * t * t * 0.5 zp = df['z0'] + df['vz0'] * t + df['az'] * t * t * 0.5 df['plateX'] = np.round(xp, 5) df['plateZ'] = np.round(zp, 5) # calculate pitch movement(pfx_x, pfx_z) t40 = -df['vy0'] - np.sqrt(df['vy0'] * df['vy0'] - 2 * df['ay'] * (df['y0'] - 40)) t40 /= df['ay'] x40 = df[ 'x0'] + df['vx0'] * t40 + 0.5 * df['ax'] * t40 * t40 vx40 = df['vx0'] + df['ax'] * t40 z40 = df[ 'z0'] + df['vz0'] * t40 + 0.5 * df['az'] * t40 * t40 vz40 = df['vz0'] + df['az'] * t40 th = t - t40 x_no_air = x40 + vx40 * th z_no_air = z40 + vz40 * th - 0.5 * 32.174 * th * th df['pfx_x'] = np.round((xp - x_no_air) * 12, 5) df['pfx_z'] = np.round((zp - z_no_air) * 12, 5) # load back to json structure dfjsstr = df.to_json(orient='records', force_ascii=False) dfjs = json.loads(dfjsstr) # dump to json file fp = open(game_id + '_pdata.json', 'w', newline='\n') json.dump(dfjs, fp, ensure_ascii=False, sort_keys=False, indent=4) fp.close() # dump to csv file fp = open(game_id + '_pdata.csv', 'w', newline='\n') cf = csv.writer(fp) cf.writerow(pdata_header_row) for x in dfjs: row = [ x['x0'], x['y0'], x['z0'], x['vx0'], x['vy0'], x['vz0'], x['ax'], x['ay'], x['az'], x['plateX'], x['plateZ'], x['crossPlateX'], x['crossPlateY'], x['topSz'], x['bottomSz'], x['stuff'], x['speed'], x['pitcherName'], x['batterName'] ] month_cf.writerow(row) year_cf.writerow(row) cf.writerow(row) fp.close() done += 1 else: skipped += 1 if lm is not None: lm.log('Cannot get response : {}'.format(game_id)) print_progress(' Downloading: ', len(game_ids[year][month]), done, skipped) # download done print_progress(' Downloading: ', len(game_ids[year][month]), done, skipped) print('\n Downloaded {} files'.format(done)) print(' (Skipped {} files)'.format(skipped)) end2 = time.time() print(' -- elapsed {:.3f} sec for month {}'.format( end2 - start2, month)) month_fp.close() os.chdir('..') # path: pbp_data/year end1 = time.time() print(' -- elapsed {:.3f} sec for year {}'.format( end1 - start1, year)) # months done year_fp.close() os.chdir('..') # path: pbp_data/ # years done os.chdir('..') # path: root return True
def download_relay(args, lm=None): # return True or False relay_url = 'http://m.sports.naver.com/ajax/baseball/gamecenter/kbo/relayText.nhn' record_url = 'http://m.sports.naver.com/ajax/baseball/gamecenter/kbo/record.nhn' game_ids = get_game_ids(args) if (game_ids is None) or (len(game_ids) == 0): print('no game ids') print('args: {}'.format(args)) if lm is not None: lm.log('no game ids') lm.log('args: {}'.format(args)) return False if lm is not None: lm.resetLogHandler() lm.setLogPath(os.getcwd()) lm.setLogFileName('relay_download_log.txt') lm.cleanLog() lm.createLogHandler() lm.log('---- Relay Text Download Log ----') if not os.path.isdir('pbp_data'): os.mkdir('pbp_data') os.chdir('pbp_data') # path: pbp_data print("##################################################") print("###### DOWNLOAD RELAY DATA #######") print("##################################################") for year in game_ids.keys(): start1 = time.time() print(" Year {}".format(year)) if len(game_ids[year]) == 0: print('month id is empty') print('args: {}'.format(args)) if lm is not None: lm.log('month id is empty') lm.log('args : {}'.format(args)) return False if not os.path.isdir(str(year)): os.mkdir(str(year)) os.chdir(str(year)) # path: pbp_data/year for month in game_ids[year].keys(): start2 = time.time() print(" Month {}".format(month)) if len(game_ids[year][month]) == 0: print('month id is empty') print('args: {}'.format(args)) if lm is not None: lm.log('month id is empty') lm.log('args : {}'.format(args)) return False if not os.path.isdir(str(month)): os.mkdir(str(month)) os.chdir(str(month)) # path: pbp_data/year/month # download done = 0 skipped = 0 for game_id in game_ids[year][month]: if (int(game_id[:4]) < 2008) or (int(game_id[:4]) > 7777): skipped += 1 continue if (int(game_id[:4]) == datetime.datetime.now().year) and (int( game_id[4:8]) > int( datetime.datetime.now().date().strftime('%m%d'))): skipped += 1 continue if int(game_id[4:8]) < int(regular_start[game_id[:4]]): skipped += 1 continue if int(game_id[4:8]) >= int(playoff_start[game_id[:4]]): skipped += 1 continue if game_id[8:10] not in teams: skipped += 1 continue if not check_url(relay_url): skipped += 1 if lm is not None: lm.log('URL error : {}'.format(relay_url)) continue if (int(game_id[:4]) == datetime.datetime.now().year) &\ (int(game_id[4:6]) == datetime.datetime.now().month) &\ (int(game_id[6:8]) == datetime.datetime.now().day): # do nothing done = done elif (os.path.isfile(game_id + '_relay.json')) and \ (os.path.getsize(game_id + '_relay.json') > 0): done += 1 if lm is not None: lm.log('File Duplicate : {}'.format(game_id)) continue params = {'gameId': game_id, 'half': '1'} headers = { 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) ' 'Chrome/59.0.3071.115 Safari/537.36', 'X-Requested-With': 'XMLHttpRequest', 'Host': 'm.sports.naver.com', 'Referer': 'http://m.sports.naver.com/baseball/gamecenter/kbo/index.nhn?&gameId=' + game_id + '&tab=relay' } response = requests.get(relay_url, params=params, headers=headers) if response is not None: txt = {} js = response.json() if isinstance(js, str): js = json.loads(js) last_inning = js['currentInning'] if last_inning is None: skipped += 1 lm.log('Gameday not found : {}'.format(game_id)) continue txt['relayList'] = {} for i in range(len(js['relayList'])): txt['relayList'][js['relayList'][i] ['no']] = js['relayList'][i] txt['homeTeamLineUp'] = js['homeTeamLineUp'] txt['awayTeamLineUp'] = js['awayTeamLineUp'] txt['stadium'] = js['schedule']['stadium'] response.close() for inn in range(2, last_inning + 1): params = {'gameId': game_id, 'half': str(inn)} response = requests.get(relay_url, params=params, headers=headers) if response is not None: js = response.json() if isinstance(js, str): js = json.loads(js) #js = ast.literal_eval(js) # BUGBUG # 문자중계 텍스트에 비 unicode 문자가 들어간 경우. # gameid : 20180717LGWO02018 # 문제가 되는 텍스트: \ufffd (REPLACEMENT CHARACTER) - cp949로 저장 불가 # 해결책: cp949로 encoding 불가능한 문자가 있을 때는 blank text로 교체. for i in range(len(js['relayList'])): txt['relayList'][js['relayList'][i] ['no']] = js['relayList'][i] texts = txt['relayList'][ js['relayList'][i]['no']]['textOptionList'] for i in range(len(texts)): try: texts[i]['text'].encode('cp949') except UnicodeEncodeError: texts[i]['text'] = '' else: skipped += 1 if lm is not None: lm.log( 'Cannot get response : {}'.format(game_id)) response.close() # get referee params = {'gameId': game_id} headers = { 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, ' 'like Gecko) Chrome/59.0.3071.115 Safari/537.36', 'X-Requested-With': 'XMLHttpRequest', 'Host': 'm.sports.naver.com', 'Referer': 'http://m.sports.naver.com/baseball/gamecenter/kbo/index.nhn?gameId=' + game_id + '&tab=record' } response = requests.get(record_url, params=params, headers=headers) p = regex.compile( '(?<=\"etcRecords\":\[)[\\\.\{\}\"0-9:\s\(\)\,\ba-z가-힣\{\}]+' ) result = p.findall(response.text) if len(result) == 0: txt['referee'] = '' else: txt['referee'] = result[0].split('{')[-1].split( '":"')[1].split(' ')[0] ''' p = regex.compile('stadiumName: \'\w+\'') result = p.findall(response.text) if len(result) == 0: txt['stadium'] = '' else: txt['stadium'] = result[0].split('\'')[1] ''' response.close() fp = open(game_id + '_relay.json', 'w', newline='\n') json.dump(txt, fp, ensure_ascii=False, sort_keys=False, indent=4) fp.close() ##### 텍스트만 저장 text_list = [] pts_list = [] text_list_header = [ "textOrder", "textType", "text", "ptsPitchId", "stuff", "speed" ] pts_list_header = [ "textOrder", "inn", "ballcount", "crossPlateX", "topSz", "crossPlateY", "pitchId", "vy0", "vz0", "vx0", "z0", "y0", "ax", "x0", "ay", "az", "bottomSz", "stance" ] for k in sorted(txt['relayList'].keys()): textset = txt['relayList'][k] textOptionList = textset['textOptionList'] for to in textOptionList: row = [k, to['type'], to['text']] if 'ptsPitchId' in to.keys(): row.append(to['ptsPitchId']) else: row.append('') if 'stuff' in to.keys(): row.append(to['stuff']) else: row.append('') if 'speed' in to.keys(): row.append(to['speed']) else: row.append('') text_list.append(row) if 'ptsOptionList' in textset.keys(): ptsOptionList = textset['ptsOptionList'] for po in ptsOptionList: row = [k] + list(po.values()) pts_list.append(row) fp = open(game_id + '_textset.csv', 'w', newline='\n') cf = csv.writer(fp) cf.writerow(text_list_header) for tl in text_list: cf.writerow(tl) fp.close() fp = open(game_id + '_ptsset.csv', 'w', newline='\n') cf = csv.writer(fp) cf.writerow(pts_list_header) for pl in pts_list: cf.writerow(pl) fp.close() ##### done += 1 else: skipped += 1 if lm is not None: lm.log('Cannot get response : {}'.format(game_id)) print_progress(' Downloading: ', len(game_ids[year][month]), done, skipped) # download done print_progress(' Downloading: ', len(game_ids[year][month]), done, skipped) print('\n Downloaded {} files'.format(done)) print(' (Skipped {} files)'.format(skipped)) end2 = time.time() print(' -- elapsed {:.3f} sec for month {}'.format( end2 - start2, month)) os.chdir('..') # path: pbp_data/year end1 = time.time() print(' -- elapsed {:.3f} sec for year {}'.format( end1 - start1, year)) # months done os.chdir('..') # path: pbp_data/ # years done os.chdir('..') # path: root return True
def callback(): s, p = ndb.get_multi([skey, pkey]) sc, pc = False, False if not s or not p or p.key.parent() != s.key: return for k in keys: v = self.request.POST.get('_%s' % k, None) if v is None or v == getattr(s, k): continue if v and k in sm and not utils.check_url(sm[k]['url'] + v): r['errors'].append("%s URL doesn't seem to be working." % sm[k]['name']) continue setattr(s, k, v) sc = True if set_domain: s.domain = domain sc = True pos = self.request.POST.get('pos') if pos: pages = [] for p_pos in pos.split(','): if not p_pos.startswith('p_'): continue pages.append(ndb.Key('Page', long(p_pos[2:]), parent=skey)) s.pages = pages sc = True if sc: s.put_async() spec = p.spec() for i in range(spec.get('links', 0)): k = '_link_%i' % i if k in self.request.POST: p.links[i] = self.request.POST[k] pc = True for i in range(spec.get('text', 0)): k = '_text_%i' % i if k in self.request.POST: p.text[i] = self.request.POST[k] pc = True for i in range(spec.get('lines', 0)): k = '_line_%i' % i if k in self.request.POST: p.lines[i] = self.request.POST[k] pc = True for i in range(spec.get('maps', 0)): k = '_map_%i' % i if k in self.request.POST: p.maps[i] = self.request.POST[k] pc = True cm = self.request.POST.get('p_%s_name' % p.key.id()) if cm: errors = models.Page.pagename_isvalid(s, cm) if cm.lower() != p.name_lower and errors: r['errors'].append(errors) else: p.name = cm pc = True elif 'current_menu' in self.request.POST: r['errors'].append('Page names may not be blank') if 'gal' in self.request.POST and p.type == models.PAGE_TYPE_GALLERY: p.images = [] for i in self.request.POST.get('gal').split(','): if not i: continue imgid = long(i.partition('_')[2]) p.images.append(ndb.Key('ImageBlob', imgid, parent=skey)) pc = True if p.type == models.PAGE_TYPE_BLOG: # what if we have multiple puts on the same entity here? race condition? for k in self.request.POST.keys(): value = None if k.startswith('_posttitle_'): name = 'title' elif k.startswith('_posttext_'): name = 'text' elif k.startswith('_postauthor_'): name = 'author' elif k.startswith('_postdate_'): name = 'date' d = self.request.POST[k].split('-') value = datetime.datetime(int(d[0]), int(d[1]) + 1, int(d[2])) elif k.startswith('_postdraft_'): name = 'draft' value = self.request.POST[k] == 'true' else: continue bpid = long(k.rpartition('_')[2]) bp = models.BlogPost.get_by_id(bpid, parent=p.key) if value is None: value = self.request.POST[k] setattr(bp, name, value) bp.put_async() if pc: p.put_async() return [s, p]