Example #1
0
    def drop_temp_hive_table(self):
        command = None
        create_table_stmt = None
        try:
            create_table_stmt = "DROP TABLE IF EXISTS %s_airflow_temp;" % self.table_name

            command = HiveCommand.run(query=create_table_stmt, label='default')
            self.monitor_command(command, create_table_stmt)
        except Exception as e:
            if command is None:
                raise AirflowException(
                    'create_temp_hive_table call for %s failed. No command Id available.\n%s'
                    % (create_table_stmt, e))
            else:
                raise AirflowException(
                    'create_temp_hive_table call for %s failed. https://api.qubole.com/v2/analyze?command_id=%s\n%s'
                    % (create_table_stmt, command.id, e))

        try:
            stmt = "s3cmd -c /usr/lib/hustler/s3cfg rm -rf s3://%s/result_cache/%s_airflow_temp/;" % \
                   (self.s3_bucket, self.table_name)

            command = Command.run(command_type='ShellCommand',
                                  inline=stmt,
                                  label='default')
            self.monitor_command(command, stmt)
        except Exception as e:
            if command is None:
                raise AirflowException(
                    'create_temp_hive_table call for %s failed. No command Id available.\n%s'
                    % (create_table_stmt, e))
            else:
                raise AirflowException(
                    'create_temp_hive_table call for %s failed. https://api.qubole.com/v2/analyze?command_id=%s\n%s'
                    % (create_table_stmt, command.id, e))
def upgrade(context):
    logging.debug("Execute upgrade of `%d`" % version)

    template = Template("""
        CREATE TABLE IF NOT EXISTS qubole_bi_${env}_${account_id}.tco_table (
                    node_id BIGINT,
                    cluster_inst_id INT,
                    cluster_id INT,
                    instance_type STRING,
                    availability_zone STRING,
                    region STRING,
                    hour BIGINT,
                    price DOUBLE,
                    node_run_time INT,
                    approx_per_hour_price DOUBLE,
                    ondemand_price DOUBLE,
                    up_time STRING,
                    down_time STRING,
                    node_type STRING
                )
                PARTITIONED BY (account_id INT, event_date STRING)
                STORED AS ORC
                LOCATION '${defloc}/qubole_bi/tco_table/'
    """)
    context["revisions.upgraded"].append(version)
    Migrations.upgrade(
        migration_number=version,
        command=HiveCommand.run(
            query=template.render_unicode(env=context["env"],
                                          account_id=context['account_id'],
                                          defloc=context['defloc'])))
    print context
def downgrade(context):
    logging.debug("Execute downgrade of `%d`" % version)
    template = Template("""
        DROP TABLE IF EXISTS qubole_bi_${env}_${account_id}.clusters_metadata2;
    """)
    Migrations.downgrade(migration_number=version,
                                command=HiveCommand.run(query=template.render_unicode(env=context["env"],
                                account_id = context['account_id'])))
Example #4
0
def upgrade(context):
    logging.debug("Execute upgrade of `%d`" % version)

    template = Template("""
        CREATE DATABASE IF NOT EXISTS qubole_bi_${env}_${account_id};
    """)
    context["revisions.upgraded"].append(version)
    Migrations.upgrade(
        migration_number=version,
        command=HiveCommand.run(query=template.render_unicode(
            env=context["env"], account_id=context['account_id'])))
    print context
Example #5
0
def qubole(api_token,sql,replacements,filename):
    Qubole.configure(api_token=api_token)
    with open(sql,'r') as f:
        query = f.read()
        
    label='Trading-spark'
    query = find_replace_multi(query,replacements)
    hc = HiveCommand.run(query=query, label=label)
    cmd = Command.find(hc.id)
    out_file = filename + '.csv'
    
    with open(out_file, 'wb') as writer:
        cmd.get_results(writer)

    df = pd.read_csv(out_file, delimiter='\t')

    return df
Example #6
0
    def copy_to_temp_hive(self, columns):
        command = None
        copy_table_stmt = None
        try:
            copy_table_stmt = '''SET hive.exec.compress.output=false; 
                INSERT INTO %s_airflow_temp select * from %s''' % (
                self.table_name, self.table_name)

            command = HiveCommand.run(query=copy_table_stmt, label='default')
            self.monitor_command(command, copy_table_stmt)
        except Exception as e:
            if command is None:
                raise AirflowException(
                    'RS_copy_to_temp_hive call for %s failed. No command Id available. \n%s'
                    % (copy_table_stmt, e))
            else:
                raise AirflowException(
                    'RS_copy_to_temp_hive call for %s failed. https://api.qubole.com/v2/analyze?command_id=%s\n%s'
                    % (copy_table_stmt, command.id, e))
def upgrade(context):
    logging.debug("Execute upgrade of `%d`" % version)

    template = Template("""
        CREATE EXTERNAL TABLE IF NOT EXISTS qubole_bi_${env}_${account_id}.clusters_metadata2 (
                    cluster_id STRING,
                    cluster_inst_id STRING,
                    tag STRING,
                    ec2_tag STRING
                )
                PARTITIONED BY (account_id INT, event_date STRING)
                STORED AS ORC
                Location '${defloc}/qubole_bi/clusters_metadata2/'
    """)
    context["revisions.upgraded"].append(version)
    Migrations.upgrade(migration_number=version,
                                command=HiveCommand.run(query=template.render_unicode(env=context["env"],
                                account_id = context['account_id'],
                                defloc =  context['defloc'])))
    print context
def upgrade(context):
    logging.debug("Execute upgrade of `%d`" % version)

    template = Template("""
        CREATE EXTERNAL TABLE IF NOT EXISTS qubole_bi_${env}_${account_id}.queries_info (
                    command_id STRING,
                    engine STRING,
                    similarity_identifier STRING,
                    similarity_payload MAP<STRING, STRING>,
                    counters MAP<STRING, STRING>,
                    cluster_id INT)
              PARTITIONED BY (account_id INT, event_date STRING)
              STORED AS ORC
              LOCATION '${defloc}/qubole_bi/queries_info/'
    """)
    context["revisions.upgraded"].append(version)
    Migrations.upgrade(
        migration_number=version,
        command=HiveCommand.run(
            query=template.render_unicode(env=context["env"],
                                          account_id=context['account_id'],
                                          defloc=context['defloc'])))
    print context
Example #9
0
    def create_temp_hive_table(self, columns):
        command = None
        create_table_stmt = None
        try:
            create_table_stmt = """
            SET hive.exec.compress.output=false;

            DROP TABLE IF EXISTS %s_airflow_temp;  

            CREATE EXTERNAL TABLE %s_airflow_temp(
            %s)
            COMMENT 'The table %s_airflow_temp was generated by Hadoop Result Caching'
            ROW FORMAT DELIMITED
            FIELDS TERMINATED BY ','
            LINES TERMINATED BY '\\n'
            STORED AS TEXTFILE
            LOCATION 's3n://%s/result_cache/%s_airflow_temp/'
            TBLPROPERTIES("orc.compress"="NONE");""" % \
                                (self.table_name,
                                 self.table_name,
                                 ',\n'.join(['%s     %s' % (c[0], c[1]) for c in columns]),
                                 self.table_name,
                                 self.s3_bucket,
                                 self.table_name
                                 )

            command = HiveCommand.run(query=create_table_stmt, label='default')
            self.monitor_command(command, create_table_stmt)
        except Exception as e:
            if command is None:
                raise AirflowException(
                    'RS_create_temp_hive_table call for %s failed. No command Id available. \n %s'
                    % (create_table_stmt, e))
            else:
                raise AirflowException(
                    'RS_create_temp_hive_table call for %s failed. https://api.qubole.com/v2/analyze?command_id=%s \n %s'
                    % (create_table_stmt, command.id, e))
Example #10
0
    def run_sql(self):
        command = None
        sql_stmt = None
        try:
            template = Template(self.raw_sql)
            dwd = DataWarehouseDates()
            sql_stmt = template.render(
                DS=self.ds,
                DS_TODAY=dwd.ds_today(self.ds),
                DS_DATE_ID=dwd.date_id_from_date_str(self.ds),
                DS_DATE_ADD=lambda days: dwd.date_add(self.ds, days),
                DS_TODAY_DATE_ADD=lambda days: dwd.date_add(
                    dwd.ds_today(self.ds), days))
            sql_stmt_with_tracking = '-- %s %s %s\n%s' % (
                self.dag_id, self.task_id, self.ds, sql_stmt)
            qubole_name = '%s_%s_%s' % (self.dag_id, self.task_id, self.ds)
            if 'PRESTO' in self.db_type.upper():
                command = PrestoCommand.create(query=sql_stmt_with_tracking,
                                               label=self.label,
                                               name=qubole_name)
            elif 'HIVE' in self.db_type.upper():
                command = HiveCommand.run(query=sql_stmt_with_tracking,
                                          label=self.label,
                                          name=qubole_name)
            else:
                raise AirflowException('Invalid db_type specified.')

            self.monitor_command(command, sql_stmt_with_tracking)
        except Exception as e:
            if command is None:
                raise AirflowException(
                    'run_sql call for %s failed. No command Id available.\n%s'
                    % (sql_stmt, e))
            else:
                raise AirflowException(
                    'run_sql call for %s failed. https://api.qubole.com/v2/analyze?command_id=%s\n%s'
                    % (sql_stmt, command.id, e))
Example #11
0
    def get_table_columns(self):
        command = None
        sql = None
        try:
            sql = "show create table %s" % self.table_name
            command = HiveCommand.run(query=sql, label='default')
            buffer = io.BytesIO()
            command.get_results(fp=buffer)
            buffer.seek(0)

            buffer_list = [t for t in buffer]

            buffer.close()
            buffer = None

            column_names_types = []
            for l in buffer_list:
                decoded = l.decode("utf-8")
                column_type = decoded.split()
                if len(column_type) > 1:
                    if self.is_type(column_type[1].strip().strip(',').strip(
                            '(').strip(')')):
                        column_names_types.append(
                            (column_type[0].strip(),
                             column_type[1].strip(',').strip('(').strip(')')))

            return column_names_types

        except Exception as e:
            if command is None:
                raise AirflowException(
                    'RS_get_table_columns call for %s failed. No command Id available.\n%s'
                    % (sql, e))
            else:
                raise AirflowException(
                    'RS_get_table_columns call for %s failed. https://api.qubole.com/v2/analyze?command_id=%s\n%s'
                    % (sql, command.id, e))
def upgrade(context):
    logging.debug("Execute upgrade of `%d`" % version)

    template = Template("""
        CREATE EXTERNAL TABLE IF NOT EXISTS qubole_bi_${env}_${account_id}.`usage`(
                `cluster_id` INT,
                `cluster_inst_id` INT,
                `node_type` STRING,
                `cluster_type` STRING,
                `compute_usage` DECIMAL(30,8))
            PARTITIONED BY (
               `account_id` INT,
               `event_date` STRING)
            STORED AS ORC
            LOCATION '${defloc}/qubole_bi/qcuh/'
    """)
    context["revisions.upgraded"].append(version)
    Migrations.upgrade(
        migration_number=version,
        command=HiveCommand.run(
            query=template.render_unicode(env=context["env"],
                                          account_id=context['account_id'],
                                          defloc=context['defloc'])))
    print context
Example #13
0
def setup():
    """
    Execute a test query and return the HiveCommand object
    """
    hc = HiveCommand.run(query=TEST_QUERY)
    return hc
Example #14
0
def setup():
    """
    Execute a test query and return the HiveCommand object
    """
    hc = HiveCommand.run(query=TEST_QUERY)
    return hc