Python sql.dataframe.DataFrame.toPandas Examples

Programming Language: Python

Namespace/Package Name: pyspark

Method/Function: toPandas

Examples at hotexamples.com: 3

Python sql.dataframe.DataFrame.toPandas - 3 examples found. These are the top rated real world Python examples of pyspark.sql.dataframe.DataFrame.toPandas extracted from open source projects. You can rate examples to help us improve the quality of examples.

Frequently Used Methods

Show Hide

withColumn(10)

select(9)

join(8)

filter(6)

count(5)

withColumnRenamed(4)

toPandas(3)

agg(2)

groupBy(2)

createOrReplaceTempView(2)

groupby(2)

cache(1)

limit(1)

query(1)

repartition(1)

fillna(1)

drop(1)

Example #1

Show file

def Group_and_save_atLeastOneMonth_SKU(
        unchanged_SKU: pyspark.sql.dataframe.DataFrame,
        changed_SKU: pyspark.sql.dataframe.DataFrame):
    """
    Separate unadjusted SKU to three sheets within same excel file: Capacity_to_avg_qty<3, 
                                             Capacity_to_avg_qty<9 and Capacity_to_avg_qty>=3,
                                             Capacity_to_avg_qty>=9
                                            
    """
    # Separate SKU and save to excel files.
    changed_SKU.toPandas().to_csv(
        '../data/Output/atLeastOneMonth/adjusted_SKU.csv',
        index=False,
        encoding='utf-8')
    print(
        "Save adjusted SKU(atLeastOneMonth) to Output/atLeastOneMonth/adjusted_SKU.csv"
    )

    unchanged_SKU = unchanged_SKU.toPandas()
    unchanged_SKU1 = unchanged_SKU.query('Capacity_to_avg_qty<3')
    unchanged_SKU2 = unchanged_SKU.query(
        'Capacity_to_avg_qty<9 and Capacity_to_avg_qty>=3')
    unchanged_SKU3 = unchanged_SKU.query('Capacity_to_avg_qty>=9')
    writer = ExcelWriter('../data/Output/atLeastOneMonth/unadjusted_SKU.xlsx')
    unchanged_SKU1.to_excel(writer, 'lessThan3', index=False)
    unchanged_SKU2.to_excel(writer, 'between3And9', index=False)
    unchanged_SKU3.to_excel(writer, 'moreThan9', index=False)
    writer.save()
    print(
        "Save unadjusted SKU(atLeastOneMonth) to Output/atLeastOneMonth/unadjusted_SKU.xlsx"
    )

Example #2

Show file

def find_and_analysis_fullMonth_SKU(
        df_atLeastOneMonth: pyspark.sql.dataframe.DataFrame, split_month: int,
        spark) -> pyspark.sql.dataframe.DataFrame:
    """
    Find SKU, which "soldQty < capacity" in every month
    """
    full_month_items = select_full_month_item(
        df_atLeastOneMonth.toPandas(),
        month_list=[split_month, split_month + 1,
                    split_month + 2])  # three month data since split_month
    full_month_SKU_info = get_full_month_SKU_info(
        full_month_items, df_atLeastOneMonth.select(selected_column_fullMonth),
        spark).dropDuplicates()

    return full_month_SKU_info

Example #3

Show file

File: OOS_helperfun.py Project: YiranJing/Lagardere_CommercialAnalysis

def check_OOS_by_rules(
        df: pyspark.sql.dataframe.DataFrame, date_generated: list,
        spark: pyspark.sql.session.SparkSession
) -> pandas.core.frame.DataFrame:
    """
    Set rules for OOS items:
        Rule 1. if 0 sales all the time, no OOS. (did in clean data)
        Rule 2. if 0 stocks all the time, no OOS. (did in clean data)
        Rule 3. OOS occurs when both stock and QTY sold are 0, and have sales before and after
        Rule 4. OOS happens when we have sales before. i.e. not new product case
        Rule 5. OOS days consider only the last 7 days
    
    param df: 
        dataframe with columns:
            'date', 'item_name', 'store_name', 'POS Margin on Net Sales (INV)', 'POS Net Sales', 
            'POS Qty Sold', 'Stock Balance Qty'
    return output_data:
        dataframe with header: 
             'item_name', 'store_name', 'category', 'OOS_days', 'date_list', 'OOS_lastDay','avg_loss_sale_quantity',
             'avg_loss_net_sale','avg_loss_INV', 'total_loss_sale_quantity','total_loss_net_sale','total_loss_INV'
    
    """
    # create dataclass
    dataset = Dataset(df=df, store_item_list=get_store_item_list(spark))

    # subtract the last 7 days from current date
    one_weeks_ago = (date_generated[-1] + timedelta(days=1) -
                     timedelta(weeks=1)).date()

    # convert to panda dataframe for checking line by line
    df = df.toPandas()

    # define output directory
    OOS_result = {}
    # begin to check one by one
    for SKU_store in dataset.store_item_list:

        # Establish new status to check given a item in a given store.
        check = 0  # how many days OOS
        last_day_OOS = 0  # will be 1 if OOS in the last day of given dataset
        check_not_new = False  # check if it is the new product in this month
        check_not_removed = False  # check if the producted has been removed
        OOS_date_list = []  # the list to store the date if OOS

        item = SKU_store[0]
        store = SKU_store[1]
        # get subdataset, which contain only info inside
        #sub_data = get_sub_dataset(SKU = item, Store = store, spark = spark)
        sub_data = create_sub_time_series_one_item(df, item, store)
        # Convert to panda for further claculation
        category = sub_data.Category.unique()[-1]

        for index, row in sub_data.iterrows():
            # Rule 4: OS happens when we have sales before
            ## i.e. remove new product case
            ## or the item has only stock, but not sold
            if row['StockQty'] != 0:
                check_not_new = True

            # Rule 3: OOS occurs when both stock and QTY sold are 0 and check_not_new is True
            # OOS occurs when both stock and QTY sold are 0
            if row['StockQty'] == 0 and row['QtySold'] == 0 \
                and check_not_new == True and row['Date'] >= one_weeks_ago:
                check += 1
                OOS_date_list.append(row['Date'])
                #print('Item {} has 0 stock at store {} , Date: {}'.format(item, row['Store'],row['Date']))
                ## check OOS in last day
                last_day_OOS = check_OOS_last_day(
                    df, row['Date'])  # return 1 if OOS in the last days

            # as long as we have stock in this month, we believe this item is not removed from store
            if row['StockQty'] > 0:
                check_not_removed = True

        # When this (item, store) contains the OOS days,
        # and confirmed that this product is not been removed
        if check > 0 and check_not_removed == True:
            key = (item, store)
            loss_INV, loss_NS, loss_QTY = calculate_possible_loss(sub_data)
            OOS_result[key] = (check, loss_INV, loss_NS, loss_QTY,
                               last_day_OOS, OOS_date_list)  # returned value

    ## Output to dataframe
    output_data = out_put_data(OOS_result, category)
    return output_data