Exemple #1
0
def merge_csv_page_selected(ui):
    csv_file_name = ui.merge_csv_file_name
    ui.merge_timestamp_combobox.clear()
    ui.merge_location_combobox.clear()
    ui.merge_tableview.setModel(None)
    if csv_file_name != '':
        ui.merge_file = csv_file_name
        try:
            with open(csv_file_name, newline='') as csv_file:
                reader = csv.reader(csv_file, delimiter=',')
                columns_count = len(next(reader))
                csv_file.seek(0)
                sniffer = csv.Sniffer()
                has_header = sniffer.has_header(csv_file.read(2048))
            if not has_header:
                headers = []
                for number in range(columns_count):
                    headers.append("Column " + str(number + 1))
                df = pd.read_csv(csv_file_name, names=headers, nrows=10)
            else:
                df = pd.read_csv(csv_file_name, nrows=10)
        except Exception as e:
            error(str(e))
            return 'failure'
        header_list = list(df.columns)
        model = PandasModel(df)
        ui.merge_tableview.setModel(model)
        ui.merge_timestamp_combobox.addItems(header_list)
        ui.merge_location_combobox.addItems(header_list)
        return 'success'
    else:
        return 'failure'
Exemple #2
0
def list_all_forecast_datasets(forecast_client, dataset_group_arn):
    """Lists all datasets inside a dataset group"""
    try:
        response = forecast_client.describe_dataset_group(
            DatasetGroupArn=dataset_group_arn
        )
    except Exception as e:
        error(str(e))

    datasets_arns = response['DatasetArns']
    dataset_arn_dict = {}
    dataset_type_dict = {}
    dataset_status = {}
    for dataset_arn in datasets_arns:
        dataset_response = forecast_client.describe_dataset(
            DatasetArn=dataset_arn
        )
        if dataset_response['Status'] == 'ACTIVE':
            dataset_arn_dict[dataset_response['DatasetName']] = dataset_response['DatasetArn']
            dataset_type_dict[dataset_response['DatasetName']] = dataset_response['DatasetType']
            dataset_status[dataset_response['DatasetName']] = 'ACTIVE'
        elif dataset_response['Status'] == 'UPDATE_IN_PROGRESS':
            dataset_arn_dict[dataset_response['DatasetName']] = dataset_response['DatasetArn']
            dataset_type_dict[dataset_response['DatasetName']] = dataset_response['DatasetType']
            dataset_status[dataset_response['DatasetName']] = 'CREATING'
    return dataset_arn_dict, dataset_type_dict, dataset_status
def create_crawler(glue_client, role, s3_path, name):
    try:
        response = glue_client.create_crawler(
            Name=name,
            Role=role,
            DatabaseName='retail_demand_forecasting_db',
            Targets={'S3Targets': [{
                'Path': s3_path
            }]},
            SchemaChangePolicy={
                'UpdateBehavior': 'UPDATE_IN_DATABASE',
                'DeleteBehavior': 'DELETE_FROM_DATABASE'
            })
        return 'success'
    except glue_client.exceptions.AlreadyExistsException:
        try:
            response = glue_client.update_crawler(
                Name=name,
                Role=role,
                DatabaseName='retail_demand_forecasting_db',
                Targets={'S3Targets': [{
                    'Path': s3_path,
                }]},
                SchemaChangePolicy={
                    'UpdateBehavior': 'UPDATE_IN_DATABASE',
                    'DeleteBehavior': 'DELETE_FROM_DATABASE'
                })
            return 'success'
        except Exception as e:
            error(str(e))
            return 'failure'
    except Exception as e:
        error(str(e))
        return 'failure'
Exemple #4
0
def create_dataset_import_job(forecast_client, dataset_name, dataset_arn, s3_path, role_arn):
    """Creates a new dataset import job"""

    dataset_import_job_number = 1
    date = str(datetime.date(datetime.now()))
    date = date.replace('-', '')

    # Increment import job number if one already exists with this name
    while True:
        dataset_import_name = dataset_name + date + '_' + str(dataset_import_job_number)
        try:
            response = forecast_client.create_dataset_import_job(
                DatasetImportJobName=dataset_import_name,
                DatasetArn=dataset_arn,
                DataSource={
                    'S3Config': {
                        'Path': s3_path,
                        'RoleArn': role_arn,
                    }
                },
                TimestampFormat='yyyy-MM-dd HH:mm:ss'
            )
            return 'success'
        except forecast_client.exceptions.ResourceAlreadyExistsException:
            dataset_import_job_number += 1
        except Exception as e:
            error(str(e))
            return 'failure'
Exemple #5
0
def lookup_item_clicked(ui):
    # Get item id and location if enabled
    item_id = ui.forecast_view_item_id_line_edit.text()
    location = None
    if ui.use_location:
        location = ui.forecast_view_location_line_edit.text()

    # Run Query on AWS Athena with the provided item_id and location
    data_file_name, status = run_forecast_query(ui.athena_client,
                                                ui.selected_forecast,
                                                ui.s3_bucket_path, item_id,
                                                location)
    if status == 'failure':
        return 'status'
    key = 'queries/'
    object_name = key + data_file_name
    downloading_path = os.path.join(ui.tmp_dir_name, data_file_name)
    try:
        ui.s3_client.download_file(ui.s3_bucket_name, object_name,
                                   downloading_path)
        df = pd.read_csv(downloading_path)
        ui.draw_df(df)
        return 'success'
    except Exception as e:
        error(str(e))
        return 'failure'
Exemple #6
0
def create_dataset_group(forecast_client, dataset_group_name):
    """Creates a new dataset group"""

    try:
        response = forecast_client.create_dataset_group(DatasetGroupName=dataset_group_name, Domain="RETAIL")

    except Exception as e:
        error(str(e))
def create_database(glue_client):
    try:
        response = glue_client.create_database(
            DatabaseInput={'Name': 'retail_demand_forecasting_db'})
    except glue_client.exceptions.AlreadyExistsException:
        return 'success'
    except Exception as e:
        error(str(e))
        return 'failure'
Exemple #8
0
def link_dataset_to_dataset_group(forecast_client, dataset_arn_list, dataset_group_arn):
    """Connects a newly created dataset to a dataset group"""

    try:
        response = forecast_client.update_dataset_group(
            DatasetGroupArn=dataset_group_arn,
            DatasetArns=dataset_arn_list
        )
        return 'success'
    except Exception as e:
        error(str(e))
        return 'failure'
Exemple #9
0
def create_forecast(forecast_client, forecast_name, predictor_arn):
    """Creates a new forecast"""

    try:
        response = forecast_client.create_forecast(
            ForecastName=forecast_name,
            PredictorArn=predictor_arn
        )
        return response['ForecastArn'], 'success'
    except Exception as e:
        error(str(e))
        return None, 'failure'
def multi_part_upload_with_s3(s3, bucket_name, key_path, file_path, progress_bar):
    # Multipart upload
    try:
        config = TransferConfig(multipart_threshold=1024 * 25, max_concurrency=10,
                                multipart_chunksize=1024 * 25, use_threads=True)
        s3.meta.client.upload_file(file_path, bucket_name, key_path,
                                   Config=config, Callback=ProgressPercentage(file_path, progress_bar))
        return 'success'
    except ClientError as e:
        worker_error_page('Access Denied!', str(e))
        return 'failure'
    except Exception as e:
        error(str(e))
        return 'failure'
Exemple #11
0
def run_forecast_query(athena_client,
                       table_name,
                       s3_path,
                       item_id,
                       location=None):
    try:
        print(s3_path + '/queries/')
        if location is None:
            response = athena_client.start_query_execution(
                QueryString='SELECT * FROM ' + table_name +
                ' WHERE cast(item_id as varchar) = \'' + item_id + '\'',
                ResultConfiguration={'OutputLocation': s3_path + '/queries/'},
                QueryExecutionContext={
                    'Database': 'retail_demand_forecasting_db',
                    'Catalog': 'AwsDataCatalog'
                },
            )
        else:
            response = athena_client.start_query_execution(
                QueryString='SELECT * FROM ' + table_name +
                ' WHERE cast(item_id as varchar) = \'' + item_id +
                '\' AND cast(location as varchar) = \'' + location + '\'',
                ResultConfiguration={'OutputLocation': s3_path + '/queries/'},
                QueryExecutionContext={
                    'Database': 'retail_demand_forecasting_db',
                    'Catalog': 'AwsDataCatalog'
                },
            )
        execution_id = response["QueryExecutionId"]
        response = athena_client.get_query_execution(
            QueryExecutionId=execution_id)
        query_execution = response['QueryExecution']
        query_status = query_execution['Status']
        state = query_status['State']
        while state != 'SUCCEEDED' and state != 'FAILED' and state != 'CANCELLED':
            time.sleep(0.5)
            response = athena_client.get_query_execution(
                QueryExecutionId=execution_id)
            query_execution = response['QueryExecution']
            query_status = query_execution['Status']
            state = query_status['State']
        if state == 'SUCCEEDED':
            data_file_name = f'{execution_id}.csv'
            return data_file_name, 'success'
        else:
            raise Exception("Query Failed or Cancelled")

    except Exception as e:
        error(str(e))
        return None, 'failure'
def run_crawler(name, glue_client):
    try:
        response = glue_client.start_crawler(Name=name)
        time.sleep(5)
        response = glue_client.get_crawler(Name=name)
        crawler = response['Crawler']
        while crawler['State'] != 'READY':
            response = glue_client.get_crawler(Name=name)
            crawler = response['Crawler']
            time.sleep(5)
        return 'success'
    except Exception as e:
        error(str(e))
        return 'failure'
Exemple #13
0
def list_all_forecast_dataset_groups(forecast_client):
    """List all dataset groups inside AWS Forecast"""
    try:
        response = forecast_client.list_dataset_groups()
        dataset_groups = response['DatasetGroups']
        retail_dataset_groups = {}
        for dataset_group in dataset_groups:
            dataset_group_arn = dataset_group['DatasetGroupArn']
            dataset_response = forecast_client.describe_dataset_group(
                DatasetGroupArn=dataset_group_arn
            )
            if dataset_response['Domain'] == 'RETAIL':
                retail_dataset_groups[dataset_group['DatasetGroupName']] = dataset_group['DatasetGroupArn']
        return retail_dataset_groups
    except Exception as e:
        error(str(e))
Exemple #14
0
def create_metadata_dataset(forecast_client, dataset_name, attributes):
    try:
        """Creates a new metadata dataset"""

        response = forecast_client.create_dataset(
            DatasetName=dataset_name,
            Domain='RETAIL',
            DatasetType='ITEM_METADATA',
            Schema={
                'Attributes': attributes,
            }
        )
        return response['DatasetArn'], 'success'
    except Exception as e:
        error(str(e))
        return None, 'failure'
Exemple #15
0
def is_location_in_table(athena_client, table_name):
    try:
        response = athena_client.get_table_metadata(
            CatalogName='AwsDataCatalog',
            DatabaseName='retail_demand_forecasting_db',
            TableName=table_name)
    except Exception as e:
        error(str(e))
        return None, 'failure'
    table_meta_data = response['TableMetadata']
    columns = table_meta_data['Columns']
    location_in_table = False
    for column in columns:
        if column['Name'] == 'location':
            location_in_table = True
    return location_in_table, 'success'
def list_objects_in_dataset_bucket(session, region, bucket_name, dataset_group_name):
    try:
        s3_client = session.client(
            's3', region_name=region
        )
        response = s3_client.list_objects_v2(
            Bucket=bucket_name,
            Prefix=dataset_group_name + '/datasets/'
        )
        objects_list = []
        if 'Contents' in response:
            for dataset in response['Contents']:
                objects_list.append(dataset['Key'])
        return objects_list, 'success'
    except Exception as e:
        error(str(e))
        return None, 'failure'
Exemple #17
0
def create_related_dataset(forecast_client, dataset_name, data_frequency, attributes):
    try:
        """Creates a new related dataset"""

        response = forecast_client.create_dataset(
            DatasetName=dataset_name,
            Domain='RETAIL',
            DatasetType='RELATED_TIME_SERIES',
            DataFrequency=data_frequency,
            Schema={
                'Attributes': attributes,
            }
        )
        return response['DatasetArn'], 'success'
    except Exception as e:
        error(str(e))
        return None, 'failure'
Exemple #18
0
def create_forecast_export(forecast_client, forecast_export_name, forecast_arn, s3_path, s3_role_arn):
    """Creates a new forecast export"""

    try:
        response = forecast_client.create_forecast_export_job(
            ForecastExportJobName=forecast_export_name,
            ForecastArn=forecast_arn,
            Destination={
                'S3Config': {
                    'Path': s3_path,
                    'RoleArn': s3_role_arn,
                }
            },
        )
        return response['ForecastExportJobArn'], 'success'
    except Exception as e:
        error(str(e))
        return None, 'failure'
Exemple #19
0
def create_predictor(forecast_client, predictor_name, algorithm, auto_ml, forecast_horizon, forecast_frequency,
                     dataset_group_arn, location_in_datasets=False, holidays=None):
    """Creates a new predictor"""

    predictor_attributes = {'PredictorName': predictor_name, 'ForecastHorizon': forecast_horizon}

    if auto_ml:
        predictor_attributes['PerformAutoML'] = True
    else:
        predictor_attributes['AlgorithmArn'] = algorithm

    if holidays is None:
        predictor_attributes['InputDataConfig'] = {
            'DatasetGroupArn': dataset_group_arn
        }
    else:
        predictor_attributes['InputDataConfig'] = {
            'DatasetGroupArn': dataset_group_arn,
            'SupplementaryFeatures': [{
                'Name': 'holiday',
                'Value': holidays
            }
            ]
        }

    if location_in_datasets:
        predictor_attributes['FeaturizationConfig'] = {
            'ForecastFrequency': forecast_frequency,
            'ForecastDimensions': [
                'location',
            ]
        }
    else:
        predictor_attributes['FeaturizationConfig'] = {
            'ForecastFrequency': forecast_frequency
        }

    try:
        response = forecast_client.create_predictor(**predictor_attributes)
        return response['PredictorArn'], 'success'
    except Exception as e:
        error(str(e))
        return None, 'failure'
def upload_and_create_dataset(ui):
    if ui.current_dataset_type != 'METADATA':
        if ui.frequency == 'none':
            status = ui.frequency = combobox_to_freq(ui, 'DATASET')
            if status == 'failure':
                error("Failed to identify frequency")
                return 'failure'
    change_current_page(ui, ui.upload_page)
    reset_progress_bar(ui)
    status = upload_data(ui)
    if status == 'failure':
        change_current_page(ui, ui.new_dataset_name_page)
        return status
    status = create_dataset(ui)
    if status == 'failure':
        change_current_page(ui, ui.new_dataset_name_page)
        return status
    ui.select_ds_group_pb.click()
    return 'success'
Exemple #21
0
def is_location_in_datasets(forecast_client, dataset_group_arn):
    """Checks if related any dataset has 'location' as a parameter"""
    try:
        response = forecast_client.describe_dataset_group(
            DatasetGroupArn=dataset_group_arn
        )
        datasets_arns = response['DatasetArns']
        for dataset_arn in datasets_arns:
            dataset_response = forecast_client.describe_dataset(
                DatasetArn=dataset_arn
            )
            schema = dataset_response['Schema']
            attributes = schema['Attributes']
            for attribute in attributes:
                if attribute['AttributeName'] == 'location':
                    return True, 'success'
        return False, 'success'
    except Exception as e:
        error(str(e))
        return False, 'failure'
def merge_button_clicked(ui):
    save_location = ui.tmp_dir_name
    csv_file_name = ui.merge_file
    related_location_name = ui.related_location_name
    related_date_name = ui.related_date_name
    original_df = ui.dataset['RELATED']
    additional_location_name = ui.merge_location_combobox.currentText()
    additional_date_name = ui.merge_timestamp_combobox.currentText()
    try:
        with open(csv_file_name, newline='') as csv_file:
            reader = csv.reader(csv_file, delimiter=',')
            columns_count = len(next(reader))
            csv_file.seek(0)
            sniffer = csv.Sniffer()
            has_header = sniffer.has_header(csv_file.read(2048))
        if not has_header:
            headers = []
            for number in range(columns_count):
                headers.append("Column " + str(number + 1))
            df = pd.read_csv(csv_file_name, names=headers)
        else:
            df = pd.read_csv(csv_file_name)
        convert_date(df, additional_date_name)
        convert_date(original_df, related_date_name)
        result = pd.merge(
            original_df,
            df,
            left_on=[related_date_name, related_location_name],
            right_on=[additional_date_name, additional_location_name],
            how='left')
        result.to_csv(os.path.join(save_location, "merged.csv"),
                      date_format='%Y-%m-%d %H:%M:%S',
                      index=False)

        select_csv_file(ui,
                        'RELATED',
                        merge_file=os.path.join(save_location, "merged.csv"))

    except Exception as e:
        error(str(e))
        return 'failure'
def populate_dataset_view(ui, dataset_type):
    try:
        csv_file_name = ui.selected_csv[dataset_type]
        with open(csv_file_name, newline='') as csv_file:
            reader = csv.reader(csv_file, delimiter=',')
            columns_count = len(next(reader))
            csv_file.seek(0)
            sniffer = csv.Sniffer()
            has_header = sniffer.has_header(csv_file.read(2048))
        if not has_header:
            headers = []
            for number in range(columns_count):
                headers.append("Column " + str(number + 1))
            df = pd.read_csv(csv_file_name, parse_dates=True, names=headers)
        else:
            df = pd.read_csv(csv_file_name, parse_dates=True)
    except Exception as e:
        error(str(e))
        return
    if df.shape[0] == 0 or df.shape[1] == 0:
        error("Can't import an empty dataset")
        return
    header_list = list(df.columns)
    view_df = df.head(10)
    ui.dataset[dataset_type] = df
    model = PandasModel(view_df)
    if dataset_type == 'TARGET':
        ui.target_dataset_preview_tableview.setModel(None)
        ui.target_item_id_combobox.clear()
        ui.target_timestamp_combobox.clear()
        ui.target_demand_combobox.clear()
        ui.target_location_combobox.clear()
        ui.target_dataset_preview_tableview.setModel(model)
        ui.target_item_id_combobox.addItems(header_list)
        ui.target_timestamp_combobox.addItems(header_list)
        ui.target_demand_combobox.addItems(header_list)
        ui.target_location_combobox.addItem('')
        ui.target_location_combobox.addItems(header_list)
    elif dataset_type == 'RELATED':
        ui.related_dataset_preview_tableview.setModel(None)
        ui.related_item_id_combobox.clear()
        ui.related_timestamp_combobox.clear()
        ui.related_price_combobox.clear()
        ui.related_weather_combobox.clear()
        ui.related_promotion_combobox.clear()
        ui.related_location_combobox.clear()
        ui.related_dataset_preview_tableview.setModel(model)
        ui.related_item_id_combobox.addItems(header_list)
        ui.related_timestamp_combobox.addItems(header_list)
        ui.related_price_combobox.addItem('')
        ui.related_weather_combobox.addItem('')
        ui.related_location_combobox.addItem('')
        ui.related_promotion_combobox.addItem('')
        ui.related_price_combobox.addItems(header_list)
        ui.related_weather_combobox.addItems(header_list)
        ui.related_location_combobox.addItems(header_list)
        ui.related_promotion_combobox.addItems(header_list)
    else:
        ui.metadata_dataset_preview_tableview.setModel(None)
        ui.metadata_item_id_combobox.clear()
        ui.metadata_color_combobox.clear()
        ui.metadata_category_combobox.clear()
        ui.metadata_brand_combobox.clear()
        ui.metadata_dataset_preview_tableview.setModel(model)
        ui.metadata_item_id_combobox.addItems(header_list)
        ui.metadata_color_combobox.addItem('')
        ui.metadata_brand_combobox.addItem('')
        ui.metadata_category_combobox.addItem('')
        ui.metadata_color_combobox.addItems(header_list)
        ui.metadata_brand_combobox.addItems(header_list)
        ui.metadata_category_combobox.addItems(header_list)
def prepare_data(ui, dataset_type):
    ui.remote_datasets = list_all_datasets_names(ui.forecast_client)
    tmp_dir_name = ui.tmp_dir_name
    df = ui.dataset[dataset_type]
    if dataset_type == 'TARGET':
        item_id_name = str(ui.target_item_id_combobox.currentText())
        timestamp_name = str(ui.target_timestamp_combobox.currentText())
        demand_name = str(ui.target_demand_combobox.currentText())
        location_name = str(ui.target_location_combobox.currentText())

        attributes = [{
            "AttributeName": "timestamp",
            "AttributeType": "timestamp"
        }, {
            "AttributeName": "item_id",
            "AttributeType": "string"
        }, {
            "AttributeName": "demand",
            "AttributeType": "float"
        }]
        if location_name != '':
            attributes.append({
                "AttributeName": "location",
                "AttributeType": "string"
            })
            df = df.reindex(columns=[
                timestamp_name, item_id_name, demand_name, location_name
            ])
        else:
            df = df.reindex(
                columns=[timestamp_name, item_id_name, demand_name])
        ui.dataset_attributes[dataset_type] = attributes
        if df[timestamp_name].dtype == 'object':
            try:
                df[timestamp_name] = pd.to_datetime(df[timestamp_name])
            except Exception as e:
                error(
                    "Can't convert timestamp to datetime, please check your data. "
                    + str(e))
                return 'failure'
        else:
            error(
                "Can't convert timestamp to datetime, please check your data.")
            return 'failure'
        if not df[timestamp_name].is_monotonic_increasing:
            df = df.sort_values(by=timestamp_name)
        diff_values = np.unique(np.diff(df[timestamp_name].values)).tolist()
        frequency = detect_frequency(diff_values)
        if frequency == 'none':
            output = set_enable_freq_input(ui, True)
        else:
            output = set_enable_freq_input(ui, False, frequency)
        if output == 'failure':
            raise Exception("Failed to set enable frequency inputs.")
        ui.frequency = frequency
        df.to_csv(os.path.join(tmp_dir_name, "tmp_dataset.csv"),
                  date_format='%Y-%m-%d %H:%M:%S',
                  header=False,
                  index=False)
        return 'success'
    elif dataset_type == 'RELATED':
        item_id_name = str(ui.related_item_id_combobox.currentText())
        timestamp_name = str(ui.related_timestamp_combobox.currentText())
        weather_name = str(ui.related_weather_combobox.currentText())
        price_name = str(ui.related_price_combobox.currentText())
        promotion_name = str(ui.related_promotion_combobox.currentText())
        location_name = str(ui.related_location_combobox.currentText())
        columns = [timestamp_name, item_id_name]
        attributes = [{
            "AttributeName": "timestamp",
            "AttributeType": "timestamp"
        }, {
            "AttributeName": "item_id",
            "AttributeType": "string"
        }]
        if location_name != '':
            attributes.append({
                "AttributeName": "location",
                "AttributeType": "string"
            })
            columns.append(location_name)
        if weather_name != '':
            attributes.append({
                "AttributeName": "weather",
                "AttributeType": "float"
            })
            columns.append(weather_name)
        if price_name != '':
            attributes.append({
                "AttributeName": "price",
                "AttributeType": "float"
            })
            columns.append(price_name)
        if promotion_name != '':
            attributes.append({
                "AttributeName": "price",
                "AttributeType": "float"
            })
            columns.append(promotion_name)
        df = df.reindex(columns=columns)
        if promotion_name != '':
            df[promotion_name] = df[promotion_name].replace({
                True: 1,
                False: 0,
                'True': 1,
                'False': 0,
                'true': 1,
                'false': 0,
                1.0: 1,
                0.0: 0,
                '1.0': 1,
                '0.0': 0,
                '1': 1,
                '0': 0
            })
        ui.dataset_attributes[dataset_type] = attributes
        if df[timestamp_name].dtype == 'object':
            try:
                df[timestamp_name] = pd.to_datetime(df[timestamp_name])
            except Exception as e:
                error(
                    "Can't convert timestamp to datetime, please check your data. "
                    + str(e))
                return 'failure'
        else:
            error(
                "Can't convert timestamp to datetime, please check your data.")
            return 'failure'
        if not df[timestamp_name].is_monotonic_increasing:
            df = df.sort_values(by=timestamp_name)
        diff_values = np.unique(np.diff(df[timestamp_name].values)).tolist()
        frequency = detect_frequency(diff_values)
        if frequency == 'none':
            output = set_enable_freq_input(ui, True)
        else:
            output = set_enable_freq_input(ui, False, frequency)
        if output == 'failure':
            raise Exception("Failed to enable frequency inputs.")
        ui.frequency = frequency
        ui.dataset[dataset_type] = df
        df.to_csv(os.path.join(tmp_dir_name, "tmp_dataset.csv"),
                  date_format='%Y-%m-%d %H:%M:%S',
                  header=False,
                  index=False)
        return 'success'
    else:
        item_id_name = str(ui.metadata_item_id_combobox.currentText())
        color_name = str(ui.metadata_color_combobox.currentText())
        category_name = str(ui.metadata_category_combobox.currentText())
        brand_name = str(ui.metadata_brand_combobox.currentText())
        columns = [item_id_name]
        attributes = [{"AttributeName": "item_id", "AttributeType": "string"}]
        if color_name != '':
            attributes.append({
                "AttributeName": "color",
                "AttributeType": "string"
            })
            columns.append(color_name)
        if category_name != '':
            attributes.append({
                "AttributeName": "category",
                "AttributeType": "string"
            })
            columns.append(category_name)
        if brand_name != '':
            attributes.append({
                "AttributeName": "brand",
                "AttributeType": "string"
            })
            columns.append(brand_name)

        set_enable_freq_input(ui, False)
        df = df.reindex(columns=columns)
        ui.dataset_attributes[dataset_type] = attributes
        df.to_csv(os.path.join(tmp_dir_name, "tmp_dataset.csv"),
                  header=False,
                  index=False)
        return 'success'
Exemple #25
0
def create_predictor_and_forecast(ui):
    try:
        # Using caffeine to prevent pc from sleeping.
        caffeine.on(display=False)

        forecast_export_name = forecast_name = predictor_name = ui.new_forecast_line_edit.text(
        )
        forecast_horizon = ui.predictor_forecast_horizon_spinbox.value()
        forecast_client = ui.forecast_client
        dataset_group_arn = ui.dataset_group_arn
        auto_ml = ui.auto_ml_checkbox.isChecked()

        # Check if a country is selected to use its holidays for forecasting.
        holidays = None
        if ui.country_checkbox.isChecked():
            holidays = selected_country(ui)

        # Check if location is a dimension in the data
        location_in_datasets, status = is_location_in_datasets(
            forecast_client, dataset_group_arn)
        if status == 'failure':
            caffeine.off()
            return status

        # Check if auto machine learning is selected
        algorithm = None
        if not auto_ml:
            algorithm = ui.algorithms[ui.algorithm_combobox.currentText()]
            print(algorithm)

        # Get frequency from combobox
        forecast_frequency = combobox_to_freq(ui, 'FORECAST')

        # Create a predictor
        predictor_arn, status = create_predictor(
            forecast_client, predictor_name, algorithm, auto_ml,
            forecast_horizon, forecast_frequency, dataset_group_arn,
            location_in_datasets, holidays)
        if status == 'failure':
            caffeine.off()
            return status

        # Wait until predictor is created
        response = forecast_client.describe_predictor(
            PredictorArn=predictor_arn)
        while response['Status'] != 'ACTIVE' and response[
                'Status'] != 'CREATE_FAILED':
            response = forecast_client.describe_predictor(
                PredictorArn=predictor_arn)
            time.sleep(10)
        if response['Status'] == 'CREATE_FAILED':
            caffeine.off()
            return 'failure'

        # Create a forecast
        forecast_arn, status = create_forecast(forecast_client, forecast_name,
                                               predictor_arn)
        if status == 'failure':
            caffeine.off()
            return status

        # Wait until forecast is created
        response = forecast_client.describe_forecast(ForecastArn=forecast_arn)
        while response['Status'] != 'ACTIVE' and response[
                'Status'] != 'CREATE_FAILED':
            response = forecast_client.describe_forecast(
                ForecastArn=forecast_arn)
            time.sleep(10)
        if response['Status'] == 'CREATE_FAILED':
            caffeine.off()
            return 'failure'
        s3_path = ui.s3_bucket_path + "/" + ui.dataset_group_name + "/forecast/" + forecast_name + "/"
        s3_role_arn = ui.s3_role_arn

        # Create a forecast export
        forecast_export_arn, status = create_forecast_export(
            ui.forecast_client, forecast_export_name, forecast_arn, s3_path,
            s3_role_arn)
        if status == 'failure':
            caffeine.off()
            return status

        # Wait until forecast export is created
        while response['Status'] != 'ACTIVE' and response[
                'Status'] != 'CREATE_FAILED':
            response = forecast_client.describe_forecast_export_job(
                ForecastExportJobArn=forecast_export_arn)
            time.sleep(10)
        if response['Status'] == 'CREATE_FAILED':
            caffeine.off()
            return 'failure'

        # Wait 30 seconds to make sure exported files show on S3 (this should be immediate)
        time.sleep(30)

        # Create a database in AWS Glue
        status = create_database(ui.glue_client)
        if status == 'failure':
            caffeine.off()
            return status

        # Create a crawler in AWS Glue
        status = create_crawler(ui.glue_client, s3_role_arn, s3_path,
                                forecast_name)
        if status == 'failure':
            caffeine.off()
            return status

        # Run the created crawler
        status = run_crawler(forecast_name, ui.glue_client)
        if status == 'failure':
            caffeine.off()
            return status

        caffeine.off()
        return 'success'
    except Exception as e:
        caffeine.off()
        error(str(e))
        return 'failure'
import tempfile
from botocore.exceptions import ClientError
import sys
from PyQt5 import QtWidgets, QtCore
from src.gui_connector_utils import buttons_connector
from gui.gui import Ui_main_window
from gui.error_display_functions import error, access_error
import darkdetect
from src.shared_utils import gui_initializer, dark_mode

with tempfile.TemporaryDirectory() as tmp_dir_name:
    try:
        app = QtWidgets.QApplication(sys.argv)
        main_window = QtWidgets.QWidget()
        ui = Ui_main_window()
        gui_initializer(ui, main_window, tmp_dir_name)

        #If darkmode make use the dark logo
        if darkdetect.isDark():
            dark_mode(ui)
        main_window.show()
        buttons_connector(ui)
    except ClientError as e:
        access_error(str(e))
    except Exception as e:
        error(str(e))
    sys.exit(app.exec_())
Exemple #27
0
def list_all_forecast_forecasts(forecast_client, dataset_group_arn, s3_path):
    """List all forecasts inside a dataset group"""
    try:
        forecast_set = set()
        predictor_set = set()
        forecast_exports_set = set()
        response = forecast_client.list_forecasts(
            Filters=[
                {
                    'Key': 'DatasetGroupArn',
                    'Value': dataset_group_arn,
                    'Condition': 'IS'
                },
                {
                    'Key': 'Status',
                    'Value': 'ACTIVE',
                    'Condition': 'IS'
                }
            ]
        )
        forecasts_arn_dict = {}
        forecasts = response['Forecasts']
        for forecast in forecasts:
            forecast_set.add(forecast['ForecastName'])
            forecasts_arn_dict[forecast['ForecastName']] = forecast['ForecastArn']
        response = forecast_client.list_predictors(
            Filters=[
                {
                    'Key': 'DatasetGroupArn',
                    'Value': dataset_group_arn,
                    'Condition': 'IS'
                },
                {
                    'Key': 'Status',
                    'Value': 'ACTIVE',
                    'Condition': 'IS'
                }
            ]
        )
        predictors = response['Predictors']
        for predictor in predictors:
            predictor_set.add(predictor['PredictorName'])

        for forecast_name, forecast_arn in forecasts_arn_dict.items():
            response = forecast_client.list_forecast_export_jobs(
                Filters=[
                    {
                        'Key': 'ForecastArn',
                        'Value': forecast_arn,
                        'Condition': 'IS'
                    },
                    {
                        'Key': 'Status',
                        'Value': 'ACTIVE',
                        'Condition': 'IS'
                    }
                ]
            )
            forecast_exports = response['ForecastExportJobs']
            for forecast_export in forecast_exports:
                destination = forecast_export['Destination']
                s3_config = destination['S3Config']
                path = s3_config['Path']
                if s3_path in path:
                    forecast_exports_set.add(forecast_export['ForecastExportJobName'])
        intersection_set = forecast_exports_set.intersection(forecast_set)
        intersection_set = intersection_set.intersection(predictor_set)
        return(list(intersection_set)), 'success'
    except Exception as e:
        error(str(e))
        return None, 'failure'