def __init__(self, mode, source): threading.Thread.__init__(self, name='metrics-worker') if CONFIG.version.endswith('.gpu'): self._version = CONFIG.version.split('.gpu')[0] self._isgpu = True else: self._version = CONFIG.version self._isgpu = False self._mode = mode self._source = source try: self._product_key = product_key.get_product_key() except Exception, e: self._product_key = None
def _track(self, event_name, value=1, type="gauge", properties={}, meta={}, send_sys_info=False): """ Internal method to actually send metrics, expected to be called from background thread only. """ if not self._usable: return the_properties = {} if send_sys_info: if not self._sys_info_set: self._set_sys_info() the_properties.update(self._sys_info) the_properties.update(properties) try: # librato self._tracker.submit(name=event_name, value=value, type="gauge", source=self._source, attributes=the_properties) except Exception as e: pass try: # since mixpanel cannot send sizes or numbers, just tracks events, bucketize these here if value != 1: event_name = self._bucketize_mixpanel(event_name, value) the_properties['value'] = value # mixpanel the_properties['source'] = self._source self._mixpanel.track(self._distinct_id, event_name, properties=the_properties, meta=meta) except Exception as e: pass try: # homebrew metrics - cloudfront if self._metrics_url != '': cloudfront_props = {} props = _copy.deepcopy(the_properties) props.update(meta) cloudfront_props['event_name'] = event_name cloudfront_props['value'] = value cloudfront_props['distinct_id'] = self._distinct_id cloudfront_props['version'] = self._version cloudfront_props['isgpu'] = self._isgpu cloudfront_props['properties'] = _urllib.quote_plus(str(props)) # if product key is not set, then try to get it now when submitting if not self._product_key: try: self._product_key = product_key.get_product_key() except Exception, e: self._product_key = 'Unknown' pass cloudfront_props['product_key'] = self._product_key # self.logger.debug("SENDING '%s' to %s" % (cloudfront_props, self._metrics_url)) self._requests.get(self._metrics_url, params=cloudfront_props) except Exception as e: pass
def _ec2_factory(instance_type, region=None, availability_zone=None, CIDR_rule=None, security_group_name=None, tags=None, user_data = {}, credentials = {}, ami_service_parameters = {}, num_hosts = 1, additional_port_to_open = None, product_type = None, subnet_id = None, security_group_id = None): ''' This function does everything necessary to bring up EC2 host(s): create a security group (if nessary), determine arguments to start up the EC2 instance (i.e. AMI id and user data), actually start up the EC2 instance, wait for it, and applies AWS tags. ''' from graphlab.connect.main import get_unity, is_connected, ENGINE_START_ERROR_MESSAGE from graphlab.product_key import get_product_key # Before launching EC2 instances we want to make sure the product key is valid. So make sure # the server has started. get_unity() assert is_connected(), ENGINE_START_ERROR_MESSAGE product_key = get_product_key() # Set default values for parameters. if(region is None): region = _get_region_from_config() if(region is None): region = 'us-west-2' else: __LOGGER__.info('Read region from config file.') if (region not in VALID_REGIONS): raise Exception("%s is not a valid region." % region) security_group, subnet_id = _setup_security_group(region = region, CIDR_rule = CIDR_rule, security_group_name = security_group_name, credentials = credentials, additional_port_to_open = additional_port_to_open, product_type = product_type, subnet_id = subnet_id, security_group_id = security_group_id) if ('GRAPHLAB_TEST_AMI_ID' in os.environ and 'GRAPHLAB_TEST_ENGINE_URL' in os.environ and 'GRAPHLAB_TEST_HASH_KEY' in os.environ): # unit-test mode, don't involve webservice to retrieve AMI, instead use environment variables ami_id = os.environ['GRAPHLAB_TEST_AMI_ID'] engine_url = os.environ['GRAPHLAB_TEST_ENGINE_URL'] __LOGGER__.info("UNIT mode, using AMI: '%s' and engine url: '%s' when launching EC2 instance." % (ami_id, engine_url)) json_blob = json.loads('{}') json_blob['ami_id'] = ami_id json_blob['engine_url'] = engine_url json_blob['hash_key'] = os.environ['GRAPHLAB_TEST_HASH_KEY'] else: # Get the info to start a EC2 from the GraphLab Server json_blob_path = JSON_BLOB_PATH_FORMAT % (instance_type, graphlab.version, region, product_key) for (param_name, param_value) in ami_service_parameters.items(): json_blob_path += "&%s=%s" % (str(param_name), str(param_value)) json_blob_url = config.graphlab_server + json_blob_path try: # set specific timeout for this web service request, lots of time spent in SSL negotiation # for staging server allows a little more time timeout_in_seconds = 10 if config.mode == 'PROD' else 60 graphlab_server_response = urlopen(json_blob_url, timeout=timeout_in_seconds) json_blob = json.loads(graphlab_server_response.read().decode('utf-8')) except: raise Exception('Unable to successfully retrieve correct EC2 image to launch for this ' 'version. This could be a temporary problem. Please try again in a few ' 'minutes.') __LOGGER__.debug("web service return: %s" % json_blob) if json_blob.get('error'): raise LicenseValidationException(json_blob.get('error')) if 'ami_id' not in json_blob or json_blob['ami_id'] is None: raise Exception("Unable to successfully retrieve correct EC2 image to launch. Please try " "again later. Error received:'%s'" % json_blob.get('message')) ami_id = json_blob['ami_id'] # Add json_blob to user_data and set the product key and hash key user_data.update(json_blob) user_data['product_key'] = product_key user_data['hash_key'] = json_blob.get('hash_key', 'NO_HASH_VALUE') # Check for empty os_url if user_data.get('os_url') is None or len(user_data.get('os_url')) == 0: user_data['os_url'] = 'NO_OS_URL' # Check for testing override of os_url param. if ('GRAPHLAB_TEST_OS_URL' in os.environ): user_data['os_url'] = os.environ['GRAPHLAB_TEST_OS_URL'] run_instances_args = { 'security_group_ids' : [ security_group.id ], 'user_data' : json.dumps(user_data), 'instance_type' : instance_type, 'placement' : availability_zone, 'subnet_id' : subnet_id } if num_hosts != 1: run_instances_args['min_count'] = num_hosts run_instances_args['max_count'] = num_hosts if 'GRAPHLAB_TEST_EC2_KEYPAIR' in os.environ: keypair = os.environ['GRAPHLAB_TEST_EC2_KEYPAIR'] __LOGGER__.info("Using keypair: '%s' when launching EC2 instance" % (keypair)) run_instances_args['key_name'] = keypair run_instances_args['block_device_map'] = get_block_device_mapping(instance_type) # Actually launch the EC2 instance(s) and wait for them to start running. instances = None try: conn = boto.vpc.connect_to_region(region, **credentials) response = conn.run_instances(ami_id, **run_instances_args) instances = response.instances if(len(response.instances) != num_hosts): raise Exception # Report for i in instances: __LOGGER__.info("Launching an %s instance in the %s availability zone, with id: %s." " You will be responsible for the cost of this instance." % (i.instance_type, i.placement, i.id)) # Wait for all host(s) to say they're done starting up. while True: try: for i in instances: # Rarely an instance can a reach temp state before going into pending. We check for # 'running' right away to make unit tests work. while not i.update() in ['pending', 'running', 'failed']: time.sleep(1) while i.update() == 'pending': time.sleep(1) if i.update() == 'failed': raise RuntimeError("Instance %s startup failed" % i.id) break except EC2ResponseError as e: # EC2 is eventual consistence so sometimes it complains that it # cannot find the instance, in that case, we will retry __LOGGER__.debug("Ignoring EC2ResponseError: %s" % e.message) # Add tags to this instance(s). if(tags is None): tags = {} if product_type is _ProductType.TuriDistributed: security_group_default_name = TURI_DISTRIBUTED_NAME tags[security_group_default_name] = '' for i in instances: conn.create_tags(i.id, tags) results = [] for i in instances: results.append(_Ec2Instance(i.ip_address, i.private_ip_address, i.id, i, region)) if num_hosts == 1: # for backward compatibility return [results[0], security_group, subnet_id] return [results, security_group, subnet_id] except Exception as e: if instances: _stop_instances([i.id for i in instances] , region) raise Exception("Unable to launch EC2 instance: '%s'. Please check AWS Console to make" " sure any EC2 instances launched have been terminated." % e)