예제 #1
0
    def __init__(self, credentials=None):
        """ Initialize the Projects object.

    Args:
      credentials: the credentials for the account.
    """
        if credentials is None:
            credentials = _utils.get_credentials()
        self._api = _api.Api(credentials)
예제 #2
0
  def __init__(self, job_id, context):
    """Initializes an instance of a Job.

    Args:
      job_id: the BigQuery job ID corresponding to this job.
      context: a Context object providing project_id and credentials.
    """
    super(Job, self).__init__(job_id)
    self._context = context
    self._api = _api.Api(context)
    self._start_time = None
    self._end_time = None
예제 #3
0
    def __init__(self, project_id=None, context=None):
        """Initializes an instance of a BucketList.

    Args:
      project_id: an optional project whose buckets we want to manipulate. If None this
          is obtained from the api object.
      context: an optional Context object providing project_id and credentials. If a specific
          project id or credentials are unspecified, the default ones configured at the global
          level are used.
    """
        if context is None:
            context = gcp.context.Context.default()
        self._context = context
        self._api = _api.Api(context)
        self._project_id = project_id if project_id else self._api.project_id
예제 #4
0
    def __init__(self, project_id=None, context=None):
        """ Initialize the Datasets object.

    Args:
      project_id: the ID of the project whose datasets you want to list. If None defaults
          to the project in the context.
      context: an optional Context object providing project_id and credentials. If a specific
          project id or credentials are unspecified, the default ones configured at the global
          level are used.
    """
        if context is None:
            context = datalab.context.Context.default()
        self._context = context
        self._api = _api.Api(context)
        self._project_id = project_id if project_id else self._api.project_id
예제 #5
0
    def __init__(self, name, info=None, context=None):
        """Initializes an instance of a Bucket object.

    Args:
      name: the name of the bucket.
      info: the information about the bucket if available.
      context: an optional Context object providing project_id and credentials. If a specific
          project id or credentials are unspecified, the default ones configured at the global
          level are used.
    """
        if context is None:
            context = gcp.context.Context.default()
        self._context = context
        self._api = _api.Api(context)
        self._name = name
        self._info = info
예제 #6
0
파일: _item.py 프로젝트: initaldk/datalab
    def __init__(self, bucket, key, info=None, context=None):
        """Initializes an instance of an Item.

    Args:
      bucket: the name of the bucket containing the item.
      key: the key of the item.
      info: the information about the item if available.
      context: an optional Context object providing project_id and credentials. If a specific
          project id or credentials are unspecified, the default ones configured at the global
          level are used.
    """
        if context is None:
            context = gcp.Context.default()
        self._context = context
        self._api = _api.Api(context)
        self._bucket = bucket
        self._key = key
        self._info = info
예제 #7
0
파일: _item.py 프로젝트: initaldk/datalab
    def __init__(self, bucket, prefix, delimiter, context=None):
        """Initializes an instance of an ItemList.

    Args:
      bucket: the name of the bucket containing the items.
      prefix: an optional prefix to match items.
      delimiter: an optional string to simulate directory-like semantics. The returned items
           will be those whose names do not contain the delimiter after the prefix. For
           the remaining items, the names will be returned truncated after the delimiter
           with duplicates removed (i.e. as pseudo-directories).
      context: an optional Context object providing project_id and credentials. If a specific
          project id or credentials are unspecified, the default ones configured at the global
          level are used.
    """
        if context is None:
            context = gcp.Context.default()
        self._context = context
        self._api = _api.Api(context)
        self._bucket = bucket
        self._prefix = prefix
        self._delimiter = delimiter
예제 #8
0
  def __init__(self, name, context=None):
    """Initializes an instance of a Table object. The Table need not exist yet.

    Args:
      name: the name of the table either as a string or a 3-part tuple (projectid, datasetid, name).
        If a string, it must have the form '<project>:<dataset>.<table>' or '<dataset>.<table>'.
      context: an optional Context object providing project_id and credentials. If a specific
        project id or credentials are unspecified, the default ones configured at the global
        level are used.
    Raises:
      Exception if the name is invalid.
    """
    if context is None:
      context = gcp.Context.default()
    self._context = context
    self._api = _api.Api(context)
    self._name_parts = _utils.parse_table_name(name, self._api.project_id)
    self._full_name = '%s:%s.%s%s' % self._name_parts
    self._info = None
    self._cached_page = None
    self._cached_page_index = 0
예제 #9
0
  def __init__(self, name, context=None):
    """Initializes an instance of a DataSet.

    Args:
      name: the name of the dataset, as a string or (project_id, dataset_id) tuple.
      context: an optional Context object providing project_id and credentials. If a specific
          project id or credentials are unspecified, the default ones configured at the global
          level are used.
    Raises:
      Exception if the name is invalid.
      """
    if context is None:
      context = gcp.Context.default()
    self._context = context
    self._api = _api.Api(context)
    self._name_parts = _utils.parse_dataset_name(name, self._api.project_id)
    self._full_name = '%s:%s' % self._name_parts
    self._info = None
    try:
      self._info = self._get_info()
    except gcp._util.RequestException:
      pass
예제 #10
0
파일: _query.py 프로젝트: vasbala/datalab
    def __init__(self,
                 sql,
                 context=None,
                 values=None,
                 udfs=None,
                 data_sources=None,
                 **kwargs):
        """Initializes an instance of a Query object.

    Args:
      sql: the BigQuery SQL query string to execute, or a SqlStatement object. The latter will
          have any variable references replaced before being associated with the Query (i.e.
          once constructed the SQL associated with a Query is static).

          It is possible to have variable references in a query string too provided the variables
          are passed as keyword arguments to this constructor.

      context: an optional Context object providing project_id and credentials. If a specific
          project id or credentials are unspecified, the default ones configured at the global
          level are used.
      values: a dictionary used to expand variables if passed a SqlStatement or a string with
          variable references.
      udfs: array of UDFs referenced in the SQL.
      data_sources: dictionary of federated (external) tables referenced in the SQL.
      kwargs: arguments to use when expanding the variables if passed a SqlStatement
          or a string with variable references.

      Note that either values or kwargs may be used, but not both.

    Raises:
      Exception if expansion of any variables failed.
      """
        if context is None:
            context = gcp.Context.default()
        self._context = context
        self._api = _api.Api(context)
        self._data_sources = data_sources
        self._udfs = udfs

        if data_sources is None:
            data_sources = {}

        self._results = None
        self._code = None
        self._imports = []
        if values is None:
            values = kwargs

        self._sql = gcp.data.SqlModule.expand(sql, values, udfs)

        # We need to take care not to include the same UDF code twice so we use sets.
        udfs = set(udfs if udfs else [])
        for value in values.values():
            if isinstance(value, _udf.UDF):
                udfs.add(value)
        included_udfs = set([])

        tokens = gcp.data.tokenize(self._sql)
        udf_dict = {udf.name: udf for udf in udfs}

        for i, token in enumerate(tokens):
            # Find the preceding and following non-whitespace tokens
            prior = i - 1
            while prior >= 0 and tokens[prior].isspace():
                prior -= 1
            if prior < 0:
                continue
            next = i + 1
            while next < len(tokens) and tokens[next].isspace():
                next += 1
            if next >= len(tokens):
                continue

            uprior = tokens[prior].upper()
            if uprior != 'FROM' and uprior != 'JOIN':
                continue

            # Check for external tables.
            if tokens[next] not in "[('\"":
                if token not in data_sources:
                    if values and token in values:
                        if isinstance(values[token],
                                      _federated_table.FederatedTable):
                            data_sources[token] = values[token]

            # Now check for UDF calls.
            if uprior != 'FROM' or tokens[next] != '(':
                continue

            # We have a 'FROM token (' sequence.

            if token in udf_dict:
                udf = udf_dict[token]
                if token not in included_udfs:
                    included_udfs.add(token)
                    if self._code is None:
                        self._code = []
                    self._code.append(udf.code)
                    if udf.imports:
                        self._imports.extend(udf.imports)

                fields = ', '.join([f[0] for f in udf._outputs])
                tokens[i] = '(SELECT %s FROM %s' % (fields, token)

                # Find the closing parenthesis and add the additional one now needed.
                num_paren = 0
                j = i + 1
                while j < len(tokens):
                    if tokens[j] == '(':
                        num_paren += 1
                    elif tokens[j] == ')':
                        num_paren -= 1
                        if num_paren == 0:
                            tokens[j] = '))'
                            break
                    j += 1

        self._external_tables = None
        if len(data_sources):
            self._external_tables = {}
            for name, table in data_sources.items():
                if table.schema is None:
                    raise Exception(
                        'Referenced external table %s has no known schema' %
                        name)
                self._external_tables[name] = table._to_query_json()

        self._sql = ''.join(tokens)
예제 #11
0
 def _create_api(self, context):
     return _api.Api(context)