def __init__(self, credentials=None): """ Initialize the Projects object. Args: credentials: the credentials for the account. """ if credentials is None: credentials = _utils.get_credentials() self._api = _api.Api(credentials)
def __init__(self, job_id, context): """Initializes an instance of a Job. Args: job_id: the BigQuery job ID corresponding to this job. context: a Context object providing project_id and credentials. """ super(Job, self).__init__(job_id) self._context = context self._api = _api.Api(context) self._start_time = None self._end_time = None
def __init__(self, project_id=None, context=None): """Initializes an instance of a BucketList. Args: project_id: an optional project whose buckets we want to manipulate. If None this is obtained from the api object. context: an optional Context object providing project_id and credentials. If a specific project id or credentials are unspecified, the default ones configured at the global level are used. """ if context is None: context = gcp.context.Context.default() self._context = context self._api = _api.Api(context) self._project_id = project_id if project_id else self._api.project_id
def __init__(self, project_id=None, context=None): """ Initialize the Datasets object. Args: project_id: the ID of the project whose datasets you want to list. If None defaults to the project in the context. context: an optional Context object providing project_id and credentials. If a specific project id or credentials are unspecified, the default ones configured at the global level are used. """ if context is None: context = datalab.context.Context.default() self._context = context self._api = _api.Api(context) self._project_id = project_id if project_id else self._api.project_id
def __init__(self, name, info=None, context=None): """Initializes an instance of a Bucket object. Args: name: the name of the bucket. info: the information about the bucket if available. context: an optional Context object providing project_id and credentials. If a specific project id or credentials are unspecified, the default ones configured at the global level are used. """ if context is None: context = gcp.context.Context.default() self._context = context self._api = _api.Api(context) self._name = name self._info = info
def __init__(self, bucket, key, info=None, context=None): """Initializes an instance of an Item. Args: bucket: the name of the bucket containing the item. key: the key of the item. info: the information about the item if available. context: an optional Context object providing project_id and credentials. If a specific project id or credentials are unspecified, the default ones configured at the global level are used. """ if context is None: context = gcp.Context.default() self._context = context self._api = _api.Api(context) self._bucket = bucket self._key = key self._info = info
def __init__(self, bucket, prefix, delimiter, context=None): """Initializes an instance of an ItemList. Args: bucket: the name of the bucket containing the items. prefix: an optional prefix to match items. delimiter: an optional string to simulate directory-like semantics. The returned items will be those whose names do not contain the delimiter after the prefix. For the remaining items, the names will be returned truncated after the delimiter with duplicates removed (i.e. as pseudo-directories). context: an optional Context object providing project_id and credentials. If a specific project id or credentials are unspecified, the default ones configured at the global level are used. """ if context is None: context = gcp.Context.default() self._context = context self._api = _api.Api(context) self._bucket = bucket self._prefix = prefix self._delimiter = delimiter
def __init__(self, name, context=None): """Initializes an instance of a Table object. The Table need not exist yet. Args: name: the name of the table either as a string or a 3-part tuple (projectid, datasetid, name). If a string, it must have the form '<project>:<dataset>.<table>' or '<dataset>.<table>'. context: an optional Context object providing project_id and credentials. If a specific project id or credentials are unspecified, the default ones configured at the global level are used. Raises: Exception if the name is invalid. """ if context is None: context = gcp.Context.default() self._context = context self._api = _api.Api(context) self._name_parts = _utils.parse_table_name(name, self._api.project_id) self._full_name = '%s:%s.%s%s' % self._name_parts self._info = None self._cached_page = None self._cached_page_index = 0
def __init__(self, name, context=None): """Initializes an instance of a DataSet. Args: name: the name of the dataset, as a string or (project_id, dataset_id) tuple. context: an optional Context object providing project_id and credentials. If a specific project id or credentials are unspecified, the default ones configured at the global level are used. Raises: Exception if the name is invalid. """ if context is None: context = gcp.Context.default() self._context = context self._api = _api.Api(context) self._name_parts = _utils.parse_dataset_name(name, self._api.project_id) self._full_name = '%s:%s' % self._name_parts self._info = None try: self._info = self._get_info() except gcp._util.RequestException: pass
def __init__(self, sql, context=None, values=None, udfs=None, data_sources=None, **kwargs): """Initializes an instance of a Query object. Args: sql: the BigQuery SQL query string to execute, or a SqlStatement object. The latter will have any variable references replaced before being associated with the Query (i.e. once constructed the SQL associated with a Query is static). It is possible to have variable references in a query string too provided the variables are passed as keyword arguments to this constructor. context: an optional Context object providing project_id and credentials. If a specific project id or credentials are unspecified, the default ones configured at the global level are used. values: a dictionary used to expand variables if passed a SqlStatement or a string with variable references. udfs: array of UDFs referenced in the SQL. data_sources: dictionary of federated (external) tables referenced in the SQL. kwargs: arguments to use when expanding the variables if passed a SqlStatement or a string with variable references. Note that either values or kwargs may be used, but not both. Raises: Exception if expansion of any variables failed. """ if context is None: context = gcp.Context.default() self._context = context self._api = _api.Api(context) self._data_sources = data_sources self._udfs = udfs if data_sources is None: data_sources = {} self._results = None self._code = None self._imports = [] if values is None: values = kwargs self._sql = gcp.data.SqlModule.expand(sql, values, udfs) # We need to take care not to include the same UDF code twice so we use sets. udfs = set(udfs if udfs else []) for value in values.values(): if isinstance(value, _udf.UDF): udfs.add(value) included_udfs = set([]) tokens = gcp.data.tokenize(self._sql) udf_dict = {udf.name: udf for udf in udfs} for i, token in enumerate(tokens): # Find the preceding and following non-whitespace tokens prior = i - 1 while prior >= 0 and tokens[prior].isspace(): prior -= 1 if prior < 0: continue next = i + 1 while next < len(tokens) and tokens[next].isspace(): next += 1 if next >= len(tokens): continue uprior = tokens[prior].upper() if uprior != 'FROM' and uprior != 'JOIN': continue # Check for external tables. if tokens[next] not in "[('\"": if token not in data_sources: if values and token in values: if isinstance(values[token], _federated_table.FederatedTable): data_sources[token] = values[token] # Now check for UDF calls. if uprior != 'FROM' or tokens[next] != '(': continue # We have a 'FROM token (' sequence. if token in udf_dict: udf = udf_dict[token] if token not in included_udfs: included_udfs.add(token) if self._code is None: self._code = [] self._code.append(udf.code) if udf.imports: self._imports.extend(udf.imports) fields = ', '.join([f[0] for f in udf._outputs]) tokens[i] = '(SELECT %s FROM %s' % (fields, token) # Find the closing parenthesis and add the additional one now needed. num_paren = 0 j = i + 1 while j < len(tokens): if tokens[j] == '(': num_paren += 1 elif tokens[j] == ')': num_paren -= 1 if num_paren == 0: tokens[j] = '))' break j += 1 self._external_tables = None if len(data_sources): self._external_tables = {} for name, table in data_sources.items(): if table.schema is None: raise Exception( 'Referenced external table %s has no known schema' % name) self._external_tables[name] = table._to_query_json() self._sql = ''.join(tokens)
def _create_api(self, context): return _api.Api(context)