def testRestoreTimeout(self): """Verifies the persistor will reattempt failed object store writes after a timeout""" backup_dir = tempfile.mkdtemp() persistor = LogBatchPersistor(backup_dir=backup_dir) batches = [ LogBatch('Log batch buffer 1A', ObjectStore.SERVER_LOG, 'test1', 'keyA'), LogBatch('Log batch buffer 2B', ObjectStore.SERVER_LOG, 'test2', 'keyB'), LogBatch('Log batch buffer 3C', ObjectStore.SERVER_LOG, 'test3', 'keyC') ] persistor._RESTORE_INTERVAL_SECS = 0.100 # The "bad" object store which does nothing with puts. oldStore = ObjectStore.GetInstance(ObjectStore.SERVER_LOG) ObjectStore.SetInstance( ObjectStore.SERVER_LOG, _BadObjectStore(ObjectStore.SERVER_LOG_BUCKET, temporary=True, fail_fast=True)) for batch in batches: persistor.PersistLogBatch(batch) self.io_loop.add_callback( partial(self._VerifyBackupBatches, backup_dir, batches)) # Reinstate the "good" object store. ObjectStore.SetInstance(ObjectStore.SERVER_LOG, oldStore) self._RunAsync(self.io_loop.add_timeout, time.time() + 0.200) self._RunAsync(self._VerifyObjStoreBatches, batches)
def _wrapper(self, *args, **kwargs): """Disables automatic HTTP response completion on exit.""" self._auto_finish = False if datastore: self._client = DBClient.Instance() if obj_store: self._obj_store = ObjectStore.GetInstance(ObjectStore.PHOTO) if log_store: self._log_store = ObjectStore.GetInstance(ObjectStore.USER_LOG) with util.ExceptionBarrier(self._stack_context_handle_exception): return method(self, *args, **kwargs)
def RunOnce(callback): """Get list of files and call processing function.""" dry_run = options.options.dry_run logs_paths = logs_util.ServerLogsPaths('viewfinder', 'full') if dry_run: logging.warning('dry_run=True: will not upload processed logs files or update registry') logs_store = ObjectStore.GetInstance(logs_paths.SOURCE_LOGS_BUCKET) merged_store = ObjectStore.GetInstance(logs_paths.MERGED_LOGS_BUCKET) # Fetch list of raw logs files. files = yield gen.Task(GetRawLogsFileList, logs_store, logs_paths, options.options.start_date) # Fetch list of processed logs files from registry. processed_files = yield gen.Task(logs_util.GetRegistry, merged_store, logs_paths.ProcessedRegistryPath()) if processed_files is None: # None means: registry does not exist. All other errors throw exceptions. processed_files = [] # Compute list of raw files to process (and sort by filename -> sort by date). files_set = set(files) processed_set = set(processed_files) missing_files = list(files_set.difference(processed_set)) missing_files.sort() to_process = missing_files if options.options.max_files_to_process is not None: to_process = missing_files[0:options.options.max_files_to_process] logging.info('found %d raw files and %d processed files, %d missing. Will process %d.' % (len(files), len(processed_files), len(missing_files), len(to_process))) if len(missing_files) == 0: logging.info('No raw logs files to process.') callback() return merged_files = yield gen.Task(ProcessFiles, logs_store, merged_store, logs_paths, to_process, dry_run) logging.info('found %d raw files and %d processed files, %d missing, successfully processed %d' % (len(files), len(processed_files), len(missing_files), len(merged_files))) # Add processed files to registry and write to S3. # TODO(marc): any failure in merged log upload or registry upload will cause us to get out of sync. To fix this, # we should also have a list of properly applied processed logs. processed_files.extend(merged_files) processed_files.sort() if not dry_run: yield gen.Task(retry.CallWithRetryAsync, kS3UploadRetryPolicy, logs_util.WriteRegistry, merged_store, logs_paths.ProcessedRegistryPath(), processed_files) callback()
def RunOnce(client, job, callback): """Get list of files and call processing function.""" logs_paths = logs_util.ServerLogsPaths('viewfinder', 'full') merged_store = ObjectStore.GetInstance(logs_paths.MERGED_LOGS_BUCKET) start_date = options.options.start_date if options.options.smart_scan: # Search for successful full-scan run in the last week. last_run = yield gen.Task(job.FindLastSuccess, with_payload_key='stats.last_day') if last_run is None: logging.info('No previous successful scan found, rerun with --start_date') callback(None) return last_run_start = last_run['start_time'] if util.HoursSince(last_run_start) < options.options.hours_between_runs: logging.info('Last successful run started at %s, less than %d hours ago; skipping.' % (time.asctime(time.localtime(last_run_start)), options.options.hours_between_runs)) callback(None) return last_day = last_run['stats.last_day'] # Set scan_start to start of previous run - 1d. The extra 1d is in case some logs were pushed to S3 late. # This really recomputes two days (the last day that was successfully processed and the one prior). start_time = util.ISO8601ToUTCTimestamp(last_day, hour=12) - constants.SECONDS_PER_DAY start_date = util.TimestampUTCToISO8601(start_time) logging.info('Last successful analyze_logs run (%s) scanned up to %s, setting analysis start date to %s' % (time.asctime(time.localtime(last_run_start)), last_day, start_date)) # Fetch list of merged logs. files = yield gen.Task(GetMergedLogsFileList, merged_store, logs_paths, start_date) last_day = yield gen.Task(ProcessFiles, merged_store, logs_paths, files) callback(last_day) return
def List(args, callback): """List buckets/files/directories.""" assert len(args) <= 1 if len(args) == 0: # We intentionally ignore -R when listing buckets, we don't want to traverse all of S3. for b in store_utils.ListBuckets(): print '%s/' % b else: pattern = args[0] res = store_utils.ParseFullPath(pattern) if not res: logging.warning('%s is not in a registered bucket' % pattern) else: bucket, path = res store = ObjectStore.GetInstance(bucket) if options.options.R: files = yield gen.Task(store_utils.ListRecursively, store, path) else: subdirs, files = yield gen.Task(store_utils.ListFilesAndDirs, store, path) for d in subdirs: print '%s/%s' % (bucket, d) for d in files: print '%s/%s' % (bucket, d) callback()
def ListClientLogs(cls, user_id, start_timestamp, end_timestamp, filter, callback): """Queries S3 based on specified "user_id", and the specified array of ISO date strings. The results are filtered according to the regular expression "filter". Returns an array of {filename, URL} objects for each date in "iso_dates". """ obj_store = ObjectStore.GetInstance(ObjectStore.USER_LOG) def _OnListDates(date_listings): """Assemble {filename, url} objects for each date listing.""" filter_re = re.compile(filter or '.*') callback([{'filename': key, 'url': obj_store.GenerateUrl(key)} for logs in date_listings for key in logs if filter_re.search(key)]) with util.ArrayBarrier(_OnListDates) as b: iso_dates = set() t = start_timestamp while t < end_timestamp: iso_dates.add(ClientLog._IsoDate(t)) t += constants.SECONDS_PER_DAY iso_dates.add(ClientLog._IsoDate(end_timestamp)) iso_dates = sorted(iso_dates) for iso_date in iso_dates: ClientLog._ListAllKeys(obj_store, ClientLog._LogKeyPrefix(user_id, iso_date), b.Callback())
def Grep(args, callback): """Grep a set of files.""" assert len(args) >= 2 pattern = re.compile(args[0]) files = args[1:] bucket = store = None for f in files: # Parse file name and extract bucket and relative path. resolved = store_utils.ParseFullPath(f) assert resolved is not None, 'Cannot determine bucket from %s' % f b, path = resolved assert bucket is None or bucket == b, 'Input files must all be in the same bucket' if store is None: # Initialize ObjectStore for this bucket. bucket = b store = ObjectStore.GetInstance(bucket) # Read file and iterate over each line. contents = yield gen.Task(store_utils.GetFileContents, store, path) for line in contents.split('\n'): if pattern.search(line): print '%s:%s' % (f, line) callback()
def _UploadWelcomePhotos(http_client, client, user, upload_request): """Uploads a set of photos that will be used in the new user welcome conversation. These photos are uploaded to the given user account. "upload_request" is in the UPLOAD_EPISODE_REQUEST format in json_schema.py, except: 1. Activity, episode, and photo ids are added by this method. 2. Each photo dict must contain an additional "name" field which gives the start of the filename of a jpg file in the backend/resources/welcome directory. Three files must exist there, in this format: <name>_full.jpg, <name>_med.jpg, <name>_tn.jpg. """ obj_store = ObjectStore.GetInstance(ObjectStore.PHOTO) welcome_path = os.path.join(ResourcesManager.Instance().resources_path, 'welcome') # Set the ids of all activities, episodes, and photos in the welcome conversation. yield _SetWelcomeIds(user, upload_request) # Get copy and strip out names, which UploadEpisode chokes on. upload_request = deepcopy(upload_request) # Directly call the service API in order to upload the photo. upload_request_copy = deepcopy(upload_request) [ph_dict.pop('name') for ph_dict in upload_request_copy['photos']] upload_response = yield UploadEpisode(client, obj_store, user.user_id, user.webapp_dev_id, upload_request_copy) # Upload photo to blob store (in various formats). for request_ph_dict, response_ph_dict in zip(upload_request['photos'], upload_response['photos']): for format in ('full', 'med', 'tn'): # Get the photo bits from disk. f = open( os.path.join(welcome_path, '%s_%s.jpg' % (request_ph_dict['name'], format)), 'r') image_data = f.read() f.close() photo_url = response_ph_dict[format + '_put_url'] content_md5 = base64.b64encode( request_ph_dict[format + '_md5'].decode('hex')) headers = { 'Content-Type': 'image/jpeg', 'Content-MD5': content_md5 } validate_cert = not options.options.fileobjstore response = yield gen.Task(http_client.fetch, photo_url, method='PUT', body=image_data, follow_redirects=False, validate_cert=validate_cert, headers=headers) if response.code != 200: raise Exception( 'Cannot upload photo "%s". HTTP error code %d. Is server running and accessible?' % (request_ph_dict['photo_id'], response.code))
def __init__(self, client, user_id, email): super(BuildArchiveOperation, self).__init__(client) self._user_id = user_id self._email = email self._notify_timestamp = self._op.timestamp self._photo_obj_store = ObjectStore.GetInstance(ObjectStore.PHOTO) self._user_zips_obj_store = ObjectStore.GetInstance( ObjectStore.USER_ZIPS) self._offboarding_assets_dir_path = ResourcesManager.Instance( ).GetOffboardingPath() self._temp_dir_path = os.path.join( ServerEnvironment.GetViewfinderTempDirPath(), BuildArchiveOperation._OFFBOARDING_DIR_NAME) self._zip_file_path = os.path.join( self._temp_dir_path, BuildArchiveOperation._ZIP_FILE_NAME) self._content_dir_path = os.path.join( self._temp_dir_path, BuildArchiveOperation._CONTENT_DIR_NAME) self._data_dir_path = os.path.join(self._content_dir_path, CONVO_FOLDER_NAME)
def Benchmark(args, callback): """Run read or write benchmark against S3.""" assert len(args) == 1 res = store_utils.ParseFullPath(args[0]) assert res is not None, 'Test dir is not part of a registered bucket' bucket, test_dir = res is_read = options.options.bench_read num_iterations = options.options.bench_iterations test_data = [] for i in options.options.bench_size_powers: size = 2**i name = os.path.join(test_dir, '%.10d' % size) data = os.urandom(size) test_data.append((size, name, data)) store = ObjectStore.GetInstance(bucket) if is_read: logging.info('Preparing test files') yield [ gen.Task(store.Put, filename, data) for _, filename, data in test_data ] test_type = 'read' if is_read else 'write' print 'Running %s test with %d iterations per size' % (test_type, num_iterations) for fsize, fname, data in test_data: sys.stdout.write('%s %d bytes: ' % (test_type, fsize)) start = time.time() total_size = 0 tasks = [] for i in xrange(num_iterations): if is_read: tasks.append(gen.Task(store.Get, fname)) else: tasks.append(gen.Task(store.Put, fname, data)) total_size += fsize if options.options.bench_parallel: yield tasks else: for t in tasks: yield t end = time.time() delta = end - start speed = total_size / delta / 1024 sys.stdout.write('%.2fs, %.2fkB/s\n' % (delta, speed)) logging.info('Cleaning up test files') yield [gen.Task(store.Delete, filename) for _, filename, _ in test_data] callback()
def __init__(self, apple_id, password, vendor_id, html_retries=3): self._apple_id = apple_id self._password = password self._vendor_id = vendor_id self._html_retries = 3 self._form_fields = None self._available_days = None self._available_weeks = None self._object_store = ObjectStore.GetInstance(kS3Bucket)
def RunOnce(client, callback): object_store = ObjectStore.GetInstance(ObjectStore.SERVER_DATA) filenames = {} for num_days in options.options.analysis_intervals_days: filename = yield gen.Task(ProcessOneInterval, client, num_days) filenames[num_days] = filename yield gen.Task(UploadFiles, object_store, filenames.values()) yield gen.Task(SendReport, object_store, filenames) callback()
def HandleCrashes(client_store, user_id, raw_files, callback): logs_paths = logs_util.UserCrashLogsPaths(user_id) raw_store = ObjectStore.GetInstance(logs_paths.SOURCE_LOGS_BUCKET) merged_store = ObjectStore.GetInstance(logs_paths.MERGED_LOGS_BUCKET) # List all processed base_path = logs_paths.MergedDirectory() existing_files = yield gen.Task(store_utils.ListAllKeys, merged_store, prefix=base_path, marker=None) done_files = set() for e in existing_files: parsed = logs_paths.ParseMergedLogPath(e) if parsed: done_files.add(parsed) to_copy = [] for f in raw_files: parsed = logs_paths.ParseRawLogPath(f) if not parsed or parsed in done_files: continue to_copy.append(parsed) if to_copy: logging.info('User %s: %d crash files' % (user_id, len(to_copy))) if options.options.dry_run: callback() return @gen.engine def _CopyFile(source_parsed, callback): user, date, fname = source_parsed src_file = os.path.join(logs_paths.RawDirectory(), date, fname) dst_file = os.path.join(logs_paths.MergedDirectory(), date, fname) contents = yield gen.Task(raw_store.Get, src_file) yield gen.Task(merged_store.Put, dst_file, contents) callback() yield [gen.Task(_CopyFile, st) for st in to_copy] callback()
def _CreateExpectedPhotos(validator, user_id, device_id, episode_id, limit=None, start_key=None): """Return a set of photo dicts that contain all the photo metadata for photos in the episode with id "episode_id". """ photo_dicts = [] posts = validator.QueryModelObjects(Post, episode_id, limit=limit, start_key=start_key) for post in posts: post_dict = post._asdict() photo_dict = validator.GetModelObject(Photo, post.photo_id)._asdict() photo_dict.pop('share_seq', None) photo_dict.pop('client_data', None) # Do not return access URLs for posts which have been removed. if not post.IsRemoved(): obj_store = ObjectStore.GetInstance(ObjectStore.PHOTO) _AddPhotoUrls(obj_store, photo_dict) asset_keys = set() user_photo = validator.GetModelObject(UserPhoto, DBKey(user_id, post.photo_id), must_exist=False) if user_photo is not None and user_photo.asset_keys: asset_keys.update(user_photo.asset_keys) if asset_keys: photo_dict['asset_keys'] = list(asset_keys) photo_dicts.append(photo_dict) post_id = Post.ConstructPostId(episode_id, post.photo_id) user_post = validator.GetModelObject(UserPost, DBKey(user_id, post_id), must_exist=False) labels = post.labels.combine() if user_post is not None: # Union together post labels and user_post labels. labels = labels.union(user_post.labels.combine()) if len(labels) > 0: photo_dict['labels'] = list(labels) last_key = posts[-1].photo_id if len(posts) > 0 else None return (photo_dicts, last_key)
def _CreateFormats(): """Used to set up initial photos.""" obj_store = ObjectStore.GetInstance(ObjectStore.PHOTO) client = DBClient.Instance() http_client = AsyncHTTPClient() for photo_id, name in [('pgAZn77bJ-Kc', 'beach_c4'), ('pgAzpz7bJ-Mc', 'beach_a1'), ('pgB-Fh7bJ-Mg', 'beach_a2'), ('pgAzo67bJ-MV', 'beach_a3'), ('pgB-pj7bJ-Mo', 'beach_a4'), ('pgAvIa7bJ-MN', 'beach_b1'), ('pgAuoQ7bJ-MF', 'beach_b2'), ('pgAtwd7bJ-M7', 'beach_b3'), ('pgAaOJ7bJ-Kw', 'beach_c1'), ('pgA_vm7bJ-Ko', 'beach_c2'), ('pgAZna7bJ-Kk', 'beach_c3'), ('pgAW0x7bJ-KV', 'beach_d1'), ('pgAUMm7bJ-KN', 'beach_d2'), ('pfYwYR7bJ-KJ', 'party_1'), ('pfYwTk7bJ-KF', 'party_2'), ('pfYwSo7bJ-K7', 'party_3'), ('pfYw0g7bJ-K-', 'party_4'), ('pfYvoK7bJ-Jw', 'party_5'), ('pfYvhI7bJ-Jo', 'party_6'), ('prHKwa7bJ-N30', 'gone_fishing_1'), ('prBUtl7bJ-Mw', 'gone_fishing_2'), ('pfSb0S7bJ-Jk', 'street_art_1'), ('pfSasJ7bJ-Jc', 'street_art_2')]: photo = yield Photo.Query(client, photo_id, None) photo_dict = photo._asdict() photo_dict['name'] = name del photo_dict['photo_id'] del photo_dict['user_id'] del photo_dict['_version'] del photo_dict['episode_id'] print json.dumps(photo_dict, indent=True) for suffix, format in [('.f', 'full'), ('.m', 'med'), ('.t', 'tn')]: url = obj_store.GenerateUrl('%s%s' % (photo_id, suffix)) response = yield http_client.fetch(url, method='GET') welcome_path = os.path.join( ResourcesManager.Instance().resources_path, 'welcome') f = open( os.path.join(welcome_path, '%s_%s.jpg' % (name, format)), 'w') f.write(response.body) f.close()
def _MakeViewpointDict(followed): """Create a viewpoint dict from the followed object plus its referenced viewpoint object. """ viewpoint = validator.GetModelObject(Viewpoint, followed.viewpoint_id) follower = validator.GetModelObject( Follower, DBKey(followed.user_id, followed.viewpoint_id)) metadata_dict = viewpoint.MakeMetadataDict(follower) if follower.CanViewContent() and 'cover_photo' in metadata_dict: photo_dict = metadata_dict['cover_photo'] obj_store = ObjectStore.GetInstance(ObjectStore.PHOTO) _AddPhotoUrls(obj_store, photo_dict) return metadata_dict
def _PersistToObjStore(self, batch, restore=False): """Writes the given log batch to the object store. The 'restore' parameter indicates that this is an attempt to restore the log, in which case we do not rewrite it to backup on a subsequent failure. If there are callbacks waiting on a pending flush and there are no more inflight log buffers, returns all callbacks. """ batch_key = batch.Key() def _ProcessWaitCallbacks(): # If this was the last currently uploading ('in-flight') log batch, # invokes any callbacks waiting on the persistor to finish. This functionality # is only intended for testing. del self._in_flight[batch_key] if not self._in_flight: while self._wait_callbacks: self._wait_callbacks.popleft()() def _OnPut(): logging.info( 'Successfully persisted log batch %s to object store' % batch_key) # Delete the local backup file if this was a restore attempt. if restore: os.unlink(self._BackupFileName(batch)) _ProcessWaitCallbacks() def _OnError(type, value, tb): logging.error('Failed to put log batch %s to object store' % batch_key, exc_info=(type, value, tb)) # If this was the original attempt to upload a batch, save to local backup. if not restore: self._PersistToBackup(batch) _ProcessWaitCallbacks() # Add this batch to the 'in-flight' collection. This helps track any outstanding S3 requests, which # is important if the persistor is closed with outstanding batches remaining. if IOLoop.current() is not None: # Because this can be called during a process-exit scenario with no IOLoop available, we need to # check for it before using a barrier. Otherwise we persist to backup, which does not require an # IOLoop. self._in_flight[batch_key] = batch with util.Barrier(_OnPut, on_exception=_OnError) as b: ObjectStore.GetInstance(batch.store_name).Put( batch_key, batch.buffer, callback=b.Callback()) else: self._PersistToBackup(batch)
def Start(callback): assert options.options.apple or options.options.testflight, \ 'You must specify exactly one of --apple or --testflight' assert options.options.search_days > 0 # Exceptions are surfaced from both file parsers. if options.options.apple: assert not options.options.testflight, 'You must specify exactly one of --apple or --testflight' devices = AppleProvisioningProfile(options.options.apple).Devices() else: devices = TestFlightDevices(options.options.testflight).Devices() logs_paths = logs_util.ServerLogsPaths('viewfinder', 'full') merged_store = ObjectStore.GetInstance(logs_paths.MERGED_LOGS_BUCKET) # +1 because the start_date is exclusive. start_time = time.time() - (options.options.search_days + 1) * constants.SECONDS_PER_DAY start_date = util.TimestampUTCToISO8601(start_time) files = yield GetFileList(merged_store, start_date) logging.info('Looking for %d devices UDIDs in %d files' % (len(devices), len(files))) last_seen = yield GetUDIDTimestamps(merged_store, files) missing = [] by_age = [] valid = [] now = time.time() for d in devices: if d not in last_seen: missing.append(d) else: age = (now - last_seen[d]) / constants.SECONDS_PER_DAY if age > options.options.inactive_days: by_age.append((age, d)) else: valid.append(d) by_age.sort() print 'Devices still active: %d' % len(valid) print 'Devices not seen in %d days: %d' % (options.options.search_days, len(missing)) if missing: print ' ' + '\n '.join(missing) print 'Inactive devices (and days since last seen): %d' % len(by_age) for (age, device) in by_age: print ' %3d %s' % (age, device) callback()
def GetPutUrl(cls, user_id, device_id, timestamp, client_log_id, content_type=CLIENT_LOG_CONTENT_TYPE, content_md5=None, max_bytes=10 << 20): """Returns a URL for the client to write device logs to S3. URLs expire by default in a day and expect content-type CLIENT_LOG_CONTENT_TYPE. """ iso_date_str = ClientLog._IsoDate(timestamp) key = '%s/dev-%d-%s' % (ClientLog._LogKeyPrefix(user_id, iso_date_str), device_id, client_log_id) obj_store = ObjectStore.GetInstance(ObjectStore.USER_LOG) return obj_store.GenerateUploadUrl( key, content_type=content_type, content_md5=content_md5, expires_in=constants.SECONDS_PER_DAY, max_bytes=max_bytes)
def _StartWWW(run_callback, scan_ops): """Starts services necessary for operating in the Viewfinder WWW server environment. Invokes 'run_callback' asynchronously. """ client = db_client.DBClient.Instance() # Log emails and texts to the console in local mode. if options.options.local_services: EmailManager.SetInstance(LoggingEmailManager()) SMSManager.SetInstance(LoggingSMSManager()) else: EmailManager.SetInstance(SendGridEmailManager()) SMSManager.SetInstance(TwilioSMSManager()) # Set URL for local fileobjstores. if options.options.fileobjstore: # Import server for ssl and port options. from viewfinder.backend.www import server url_fmt_string = '%s://%s:%d/fileobjstore/' % ( 'https' if options.options.ssl else 'http', ServerEnvironment.GetHost(), options.options.port) url_fmt_string += '%s/%%s' for store_name in (ObjectStore.PHOTO, ObjectStore.USER_LOG, ObjectStore.USER_ZIPS): ObjectStore.GetInstance(store_name).SetUrlFmtString( url_fmt_string % store_name) OpManager.SetInstance( OpManager(op_map=DB_OPERATION_MAP, client=client, scan_ops=scan_ops)) apns_feedback_handler = Device.FeedbackHandler(client) APNS.SetInstance( 'dev', APNS(environment='dev', feedback_handler=apns_feedback_handler)) APNS.SetInstance( 'ent', APNS(environment='ent', feedback_handler=apns_feedback_handler)) APNS.SetInstance( 'prod', APNS(environment='prod', feedback_handler=apns_feedback_handler)) http_client = AsyncHTTPClient() ITunesStoreClient.SetInstance( 'dev', ITunesStoreClient(environment='dev', http_client=http_client)) ITunesStoreClient.SetInstance( 'prod', ITunesStoreClient(environment='prod', http_client=http_client)) # Ensure that system users are loaded. yield LoadSystemUsers(client) yield gen.Task(run_callback)
def Cat(args, callback): """Cat a single file.""" assert len(args) == 1 filename = args[0] # Parse file name and extract bucket and relative path. resolved = store_utils.ParseFullPath(filename) assert resolved is not None, 'Cannot determine bucket from %s' % filename bucket, path = resolved store = ObjectStore.GetInstance(bucket) # Read file and iterate over each line. contents = yield gen.Task(store_utils.GetFileContents, store, path) print contents callback()
def Put(args, callback): """Copy a single file to a S3 directory.""" assert len(args) == 2 source = args[0] dest = args[1] res_dst = store_utils.ParseFullPath(dest) assert res_dst is not None, 'Destination not part of a registered bucket' bucket = res_dst[0] dest_dir = res_dst[1] assert dest_dir.endswith( '/'), 'Destination must be a directory (with trailing slash)' # Check existence and readability of source file. if not os.access(source, os.F_OK): print 'Source file does not exist: %s' % source callback() return if not os.access(source, os.R_OK): print 'Source file is not readable: %s' % source callback() return # Check whether the destination exists. store = ObjectStore.GetInstance(bucket) dst_file = os.path.join(dest_dir, os.path.basename(source)) exists = yield gen.Task(store_utils.FileExists, store, dst_file) if exists: answer = raw_input( 'Destination exists: %s/%s. Overwrite or skip? [o/S] ' % (bucket, dst_file)) if answer != 'o': callback() return with open(source, 'r') as f: contents = f.read() # Assume 1MB/s transfer speed. If we don't have that good a connection, we really shouldn't be uploading big files. timeout = max(20.0, len(contents) / 1024 * 1024) yield gen.Task(store.Put, dst_file, contents, request_timeout=timeout) # We just assume that no exception means no failure, which is true for now. print '%s/%s: %d bytes OK' % (bucket, dst_file, len(contents)) callback()
def _VerifyObjStoreBatches(self, exp_batches, callback): def _OnGetBatch(exp_batch, cb, buffer): self.assertEqual(exp_batch.buffer, buffer) cb() def _OnListKeys(store, batches, cb, keys): self.assertEqual(len(batches), len(keys)) with util.Barrier(cb) as b2: for batch in batches: self.assertIn(batch.Key(), keys) store.Get(batch.Key(), partial(_OnGetBatch, batch, b2.Callback())) batches_by_store = self._SortBatchesByStore(exp_batches) with util.Barrier(callback) as b: for store in batches_by_store.keys(): batches = batches_by_store[store] store = ObjectStore.GetInstance(store) store.ListKeys( partial(_OnListKeys, store, batches, b.Callback()))
def _RunService(callback): """Invokes user account merge utility.""" assert options.options.method, 'must specify a service method (--method)' assert options.options.user_id, 'must specify a user id (--user_id)' # Read request body from standard in. if sys.stdin.isatty(): print 'Enter JSON-encoded service request:' request_body = sys.stdin.read() # If version was not specified, add it now (use max supported version). request_dict = json.loads(request_body) if not request_dict.has_key('headers'): request_dict['headers'] = dict() if not request_dict['headers'].has_key('version'): request_dict['headers']['version'] = MAX_SUPPORTED_MESSAGE_VERSION client = DBClient.Instance() obj_store = ObjectStore.GetInstance(ObjectStore.PHOTO) def _OnService(response_dict): logging.info('result: %s' % util.ToCanonicalJSON(response_dict, indent=2)) callback() def _OnQueryUser(user): context = base.ViewfinderContext(None) context.user = user context.device_id = user.webapp_dev_id if options.options.device_id is None else options.options.device_id with stack_context.StackContext(context): ServiceHandler.InvokeService(client, obj_store, options.options.method, context.user.user_id, context.device_id, request_dict, callback=_OnService) User.Query(client, options.options.user_id, None, _OnQueryUser)
def testBadObjStore(self): """Tests backup storage in case the object store is down. Also verifies close() method.""" backup_dir = tempfile.mkdtemp() persistor = LogBatchPersistor(backup_dir=backup_dir) batches = [ LogBatch('Log batch buffer 1A', ObjectStore.SERVER_LOG, 'test1', 'keyA'), LogBatch('Log batch buffer 2B', ObjectStore.SERVER_LOG, 'test2', 'keyB'), LogBatch('Log batch buffer 3C', ObjectStore.SERVER_LOG, 'test3', 'keyC'), LogBatch('Log batch buffer 4D', ObjectStore.USER_LOG, 'test4', 'keyD'), LogBatch('Log batch buffer 5E', ObjectStore.USER_LOG, 'test5', 'keyE') ] oldStores = [ ObjectStore.GetInstance(ObjectStore.SERVER_LOG), ObjectStore.GetInstance(ObjectStore.USER_LOG) ] ObjectStore.SetInstance( ObjectStore.SERVER_LOG, _BadObjectStore(ObjectStore.SERVER_LOG_BUCKET, temporary=True, fail_fast=False)) ObjectStore.SetInstance( ObjectStore.USER_LOG, _BadObjectStore(ObjectStore.USER_LOG_BUCKET, temporary=True, fail_fast=False)) # Cut the timeout allowed for flushing buffers on close to something small. persistor._CLOSE_TIMEOUT_SECS = 0.100 for batch in batches: persistor.PersistLogBatch(batch) self._RunAsync(persistor.close) self._VerifyBackupBatches(backup_dir, batches) # Set a functional file object store instance and verify that it # restores the pending server logs. ObjectStore.SetInstance(ObjectStore.SERVER_LOG, oldStores[0]) ObjectStore.SetInstance(ObjectStore.USER_LOG, oldStores[1]) persistor = LogBatchPersistor(backup_dir=backup_dir) self._RunAsync(persistor.Wait) self._RunAsync(self._VerifyObjStoreBatches, batches)
def RunOnce(client, job, callback): """Get list of files and call processing function.""" merged_store = ObjectStore.GetInstance(logs_util.UserAnalyticsLogsPaths.MERGED_LOGS_BUCKET) start_date = options.options.start_date if options.options.smart_scan: # Search for successful full-scan run in the last week. last_run = yield gen.Task(job.FindLastSuccess, with_payload_key='stats.last_day') if last_run is None: logging.info('No previous successful scan found, rerun with --start_date') callback(None) return last_run_start = last_run['start_time'] if util.HoursSince(last_run_start) < options.options.hours_between_runs: logging.info('Last successful run started at %s, less than %d hours ago; skipping.' % (time.asctime(time.localtime(last_run_start)), options.options.hours_between_runs)) callback(None) return last_day = last_run['stats.last_day'] # Set scan_start to start of previous run - 30d (we need 30 days' worth of data to properly compute # 30-day active users. Add an extra 3 days just in case we had some missing logs during the last run. start_time = util.ISO8601ToUTCTimestamp(last_day, hour=12) - constants.SECONDS_PER_WEEK start_date = util.TimestampUTCToISO8601(start_time) logging.info('Last successful analyze_analytics run (%s) scanned up to %s, setting analysis start date to %s' % (time.asctime(time.localtime(last_run_start)), last_day, start_date)) # Fetch list of merged logs. files = yield gen.Task(GetMergedLogsFileList, merged_store, start_date) day_stats = yield gen.Task(ProcessFiles, merged_store, files) # Write per-day stats to dynamodb. if len(day_stats) > 0: hms = logs_util.kDailyMetricsTimeByLogType['analytics_logs'] yield gen.Task(logs_util.UpdateMetrics, client, day_stats, dry_run=options.options.dry_run, hms_tuple=hms) last_day = sorted(day_stats.keys())[-1] callback(last_day) else: callback(None)
def RunOnce(callback): """Get list of files and call processing function.""" dry_run = options.options.dry_run client_store = ObjectStore.GetInstance(logs_util.UserAnalyticsLogsPaths.SOURCE_LOGS_BUCKET) if options.options.user: users = [options.options.user] else: users = yield gen.Task(logs_util.ListClientLogUsers, client_store) examined = 0 for u in users: # Running all users in parallel can get us to exceed the open FD limit. if options.options.start_user is not None and u < options.options.start_user: continue if options.options.max_users is not None and examined > options.options.max_users: break examined += 1 yield gen.Task(HandleOneUser, client_store, u) if dry_run: logging.warning('dry_run=True: will not upload processed logs files or update registry') callback()
def Move(args, callback): """Move all files a pattern to a directory.""" assert len(args) == 2 pattern = args[0] dest = args[1] res_src = store_utils.ParseFullPath(pattern) res_dst = store_utils.ParseFullPath(dest) assert res_src is not None and res_dst is not None, 'Source or destination not part of a registered bucket' assert res_src[0] == res_dst[0], 'Moving between buckets not supported' bucket, pattern = res_src dest_dir = res_dst[1] src_prefix = store_utils.PrefixFromPattern(pattern) assert dest_dir.endswith( '/'), 'Destination must be a directory (with trailing slash)' assert not src_prefix.startswith(dest_dir) and not dest_dir.startswith(src_prefix), \ 'Source and destination must not intersect' source_dir = os.path.dirname(src_prefix) + '/' store = ObjectStore.GetInstance(bucket) # Get list of files matching the pattern as well as any existing files in the destination directory. source_files = yield gen.Task(store_utils.ListRecursively, store, pattern) res = yield gen.Task(store_utils.ListRecursively, store, dest_dir) dest_files = set(res) if len(source_files) == 0: callback() return answer = raw_input( "Move %d files from %s/%s to %s/%s? [y/N] " % (len(source_files), bucket, source_dir, bucket, dest_dir)).strip() if answer != 'y': callback() return done = 0 last_update = 0.0 bytes_read = bytes_written = 0 for src_name in source_files: delta = time.time() - last_update if (delta) > 10.0: print '%d/%d, read %.2f KB/s, wrote %.2f KB/s' % ( done, len(source_files), bytes_read / delta / 1024, bytes_written / delta / 1024) last_update = time.time() bytes_read = bytes_written = 0 done += 1 dst_name = dest_dir + src_name[len(source_dir):] if dst_name in dest_files: last_update = 0.0 answer = raw_input( 'File exists: %s/%s. Overwrite, skip, or abort? [o/a/S] ' % (bucket, dst_name)) if answer == 'a': callback() return elif answer != 'o': continue # Read source file. contents = yield gen.Task(store.Get, src_name) bytes_read += len(contents) # Write destination file. yield gen.Task(store.Put, dst_name, contents) bytes_written += len(contents) if options.options.verify: # Read dest file back. dst_contents = yield gen.Task(store.Get, dst_name) bytes_read += len(dst_contents) if dst_contents != contents: logging.warning( 'Verification failed for %s/%s, deleting destination' % (bucket, dst_name)) yield gen.Task(store.Delete, dst_name) continue if options.options.delete_source: # Delete original file. yield gen.Task(store.Delete, src_name) callback()
def RunOnce(client, callback): # Find all crash files. merged_store = ObjectStore.GetInstance( logs_util.UserCrashLogsPaths.MERGED_LOGS_BUCKET) files = yield gen.Task(store_utils.ListAllKeys, merged_store, prefix='merged_user_crashes') crash_files = set() symbolicated_files = set() for f in files: if f.endswith('.crash') or f.endswith('.crash.gz'): crash_files.add(f) elif f.endswith('.crash.symbol') or f.endswith('.crash.gz.symbol'): # Strip '.symbol'. symbolicated_files.add(f[:-7]) missing = crash_files.difference(symbolicated_files) """ A sample list of files to test with. Should probably go away in favor of cmdline. missing = [ 'merged_user_crashes/8339/2013-07-25/dev-18194-01-17-14.918-2.0.1.61.crash', 'merged_user_crashes/7872/2013-07-24/dev-17246-17-46-07.277-2.0.1.61.crash', 'merged_user_crashes/8246/2013-07-25/dev-17947-00-07-59.215-2.0.1.61.crash', 'merged_user_crashes/8246/2013-07-25/dev-17947-00-08-17.822-2.0.1.61.crash', 'merged_user_crashes/8342/2013-07-25/dev-18136-00-58-09.061-2.0.1.61.crash', 'merged_user_crashes/2286/2013-07-24/dev-17163-15-19-11.778-2.1.0.70.dev.jailbroken.crash', 'merged_user_crashes/8768/2013-07-25/dev-18941-06-04-18.591-2.0.2.69.crash', 'merged_user_crashes/8339/2013-07-25/dev-18194-01-18-17.613-2.0.1.61.crash', 'merged_user_crashes/8074/2013-07-24/dev-17645-23-19-33.700-2.0.1.61.crash', 'merged_user_crashes/8320/2013-07-25/dev-18096-01-31-49.906-2.0.1.61.crash', 'merged_user_crashes/8246/2013-07-25/dev-17947-00-09-11.781-2.0.1.61.crash', 'merged_user_crashes/8751/2013-07-25/dev-18910-05-54-43.572-2.0.2.69.crash', 'merged_user_crashes/8246/2013-07-25/dev-17947-00-09-19.120-2.0.1.61.crash', 'merged_user_crashes/8341/2013-07-25/dev-18134-00-46-50.904-2.0.1.61.crash', 'merged_user_crashes/8316/2013-07-25/dev-18080-02-14-49.269-2.0.1.61.crash', 'merged_user_crashes/8743/2013-07-25/dev-18898-05-40-50.450-2.0.1.61.crash', 'merged_user_crashes/8074/2013-07-24/dev-17645-23-12-56.844-2.0.1.61.crash', 'merged_user_crashes/2286/2013-07-24/dev-17163-15-19-20.884-2.1.0.70.dev.jailbroken.crash', 'merged_user_crashes/8093/2013-07-24/dev-17650-23-29-34.198-2.0.1.61.crash', 'merged_user_crashes/8339/2013-07-25/dev-18194-01-17-40.123-2.0.1.61.crash', 'merged_user_crashes/8074/2013-07-24/dev-17645-23-19-41.996-2.0.1.61.crash' ] """ logging.info('Found %d crash logs, %d missing: %r' % (len(crash_files), len(missing), missing)) if not missing: callback() return missing_crashes = sorted(list(missing)) processed_crashes = {} failures = {} for f in missing_crashes: try: # We could encounter any number of failures in get, symbolicate, and put. contents = yield gen.Task(merged_store.Get, f) # Is there a single command to do this? lines = [l + '\n' for l in contents.split('\n')] sym = Symbolicator() sym.process_one_file(lines) newfile = f + '.symbol' if not options.options.dry_run: yield gen.Task(merged_store.Put, newfile, sym.FullOutput()) logging.info('Wrote %d bytes to %s' % (len(sym.FullOutput()), newfile)) # We write the full symbolicated file to S3, but the summary email only includes # the summary: preamble + crashed thread backtrace. out_dict = sym.OutputDict() out_dict['filename'] = newfile processed_crashes[newfile] = out_dict except: msg = traceback.format_exc() logging.error('Failed to process %s: %s' % (f, msg)) failures[f] = msg logging.info('Successfully processed crashes: %r' % processed_crashes.keys()) logging.info('Symbolicate failures: %r' % failures) # Generate the email. Keys for 'processed_crashes' are the final filenames (with .symbol). # For 'failures', keys are the non-symbolicated filenames. title = '%d new client crashes' % len(missing_crashes) text = title + '\n' def _S3URL(filename): return merged_store.GenerateUrl(filename, expires_in=constants.SECONDS_PER_DAY * options.options.s3_url_expiration_days, content_type='text/plain') if failures: title += ' (%d failed during processing)' % len(failures) text += '\nProcessing failures: %d\n' % len(failures) for f, tb in failures.iteritems(): text += '--------------------------\n' text += 'Non-symbolicated file: %s\n%s\n' % (_S3URL(f), tb) if processed_crashes: deduped_crashes = defaultdict(list) for v in processed_crashes.values(): deduped_crashes[v['crashed_thread_backtrace']].append(v) text += '\n%d symbolicated crashes, %d after deduping\n\n' % \ (len(processed_crashes), len(deduped_crashes)) for crash_set in deduped_crashes.values(): text += '--------------------------\n' text += '%d crashes after deduping. Full symbolicated files:\n' % len( crash_set) for c in crash_set: text += '%s\n' % _S3URL(c['filename']) text += '\nFirst crash:\n%s%s%s\n' % ( c['preamble'], c['crashed_thread_title'], c['crashed_thread_backtrace']) yield gen.Task(SendEmail, title, text) callback()
def StartServer(serve_webapp=True, serve_static_web=True, serve_admin=True): """Initialize the datastore and operation manager with the viewfinder schema. This typically verifies the schema. If the schema does not yet exist, it is created. Defines settings dictionary and sets up main application with list of handlers. """ client = db_client.DBClient.Instance() settings = { 'gzip': True, 'login_url': '/', 'admin_login_url': '/admin/otp', 'domain': options.options.domain, 'server_version': options.options.server_version, 'cookie_secret': secrets.GetSecret('cookie_secret'), 'facebook_api_key': secrets.GetSecret('facebook_api_key'), 'facebook_secret': secrets.GetSecret('facebook_secret'), 'google_client_id': secrets.GetSecret('google_client_id'), 'google_client_secret': secrets.GetSecret('google_client_secret'), 'google_client_mobile_id': secrets.GetSecret('google_client_mobile_id'), 'google_client_mobile_secret': secrets.GetSecret('google_client_mobile_secret'), 'template_path': ResourcesManager.Instance().template_path, 'ui_modules': uimodules, 'xsrf_cookies': options.options.enable_xsrf, 'debug': options.options.server_debug, 'static_path': ResourcesManager.Instance().static_path, } if options.options.log_file_prefix: settings['logs_dir'] = os.path.dirname(options.options.log_file_prefix) # Configure metrics uploading. if options.options.upload_metrics: for interval in metric.METRIC_INTERVALS: metric.Metric.StartMetricUpload(client, metric.DEFAULT_CLUSTER_NAME, interval) # Setup application and SSL HTTP server. handlers = deepcopy(COMMON_HANDLERS) if serve_webapp: # Configure web application handlers. webapp_handlers = deepcopy(WEBAPP_HANDLERS) # Initialize the file object store if specified. obj_store = ObjectStore.GetInstance(ObjectStore.PHOTO) settings['obj_store'] = obj_store if options.options.fileobjstore: for store_name, content_type in ((ObjectStore.PHOTO, r'image/jpeg'), (ObjectStore.USER_LOG, r'text/plain'), (ObjectStore.USER_ZIPS, r'application/zip')): webapp_handlers.append( (r'/fileobjstore/%s/(.*)' % store_name, file_object_store.FileObjectStoreHandler, { 'storename': store_name, 'contenttype': content_type })) if ServerEnvironment.IsDevBox(): webapp_handlers.append((r'/(link|login|register)/fakeviewfinder', auth_viewfinder.FakeAuthViewfinderHandler)) # Set the testing directories. if options.options.testing_path is not None: webapp_handlers.append( (r'/testing/hook/(.*)', test_hook.TestHookHandler)) webapp_handlers.append( (r'/testing/static/(.*)', web.StaticFileHandler, { 'path': '%s' % options.options.testing_path })) handlers.extend(webapp_handlers) if serve_static_web: # Configure static web handlers. static_web_handlers = deepcopy(STATIC_WEB_HANDLERS) handlers.extend(static_web_handlers) if serve_admin: # Configure and verify admin handlers. admin_handlers = deepcopy(ADMIN_HANDLERS) for path, handler in admin_handlers: if not issubclass(handler, basic_auth.BasicAuthHandler): raise TypeError('Administration handlers must ' 'subclass BasicAuthHandler') handlers.extend(admin_handlers) # Catch-all handler for 404 pages. handlers.extend([(r'/.*', base.PageNotFoundHandler)]) # Create application and separately add handlers for the short domain and the # regular domain. # # Note that, although the short-domain handlers are added after the initial construction # of the Application, those routes will take priority over the routes in the handlers # array. application = web.Application(handlers, **settings) application.add_handlers(re.escape(options.options.short_domain), SHORT_DOMAIN_HANDLERS) # Start the HTTP server. http_server = httpserver.HTTPServer( application, xheaders=options.options.xheaders, ssl_options={ 'certfile': secrets.GetSecretFile('%s.crt' % settings['domain']), 'keyfile': secrets.GetSecretFile('%s.key' % settings['domain']), } if options.options.ssl else None) with stack_context.NullContext(): http_server.listen(options.options.port) # Setup redirect server for HTTP -> HTTPS. if options.options.ssl: http_settings = { 'host': ServerEnvironment.GetHost(), 'redirect_port': options.options.redirect_port, 'xheaders': options.options.xheaders, } redirect_handlers = [ (r'/(.*)', index.RedirectHandler), ] redirect_server = httpserver.HTTPServer( web.Application(redirect_handlers, **http_settings)) with stack_context.NullContext(): redirect_server.listen(options.options.insecure_port) # Ensure that system users have been created if running with a local db (needs server to be running). if options.options.localdb: yield CreateSystemUsers(client) # Run the server until it hits an exception or stop signal. yield gen.Task(lambda callback: None)