Example #1
0
 def test_convert_row_to_dict(self):
     row = Row(l=[Row(a=1, b='s')], d={"key": Row(c=1.0, d="2")})
     self.assertEqual(1, row.asDict()['l'][0].a)
     df = self.sc.parallelize([row]).toDF()
     df.registerTempTable("test")
     row = self.sqlCtx.sql("select l, d from test").head()
     self.assertEqual(1, row.asDict()["l"][0].a)
     self.assertEqual(1.0, row.asDict()['d']['key'].c)
Example #2
0
 def take_log_in_all_columns(row: types.Row):
     old_row = row.asDict()
     new_row = {
         f'log({column_name})': math.log(value)
         for column_name, value in old_row.items()
     }
     return types.Row(**new_row)
Example #3
0
def nulls(row: T.Row) -> T.Row:
    d = row.asDict()
    _cnt = 0
    for _var in d.keys():
        if d[_var] is None:
            _cnt += 1
    d['nullcnt'] = _cnt
    return T.Row(**d)
Example #4
0
def mse(row: T.Row) -> T.Row:
    d = row.asDict()
    _mse = 0.0
    if d['Sales_Pred'] is None:
        print("'Sales_Pred'=None")
        _mse = 0
    elif d['sales'] is None:
        _mse = d['Sales_Pred']**2
    else:
        _mse = (d['Sales_Pred'] - d['sales'])**2
    d['mse'] = _mse
    return T.Row(**d)
 def getMapColumnQuery(row: Row) -> str:
     row_dict = row.asDict()
     if _xpath_return_type.__eq__('string'):
         if str(row_dict["xpath"]) is None or str(
                 row_dict["xpath"]).strip().__eq__('') or str(
                     row_dict["xpath"]).strip().__eq__('None') or str(
                         row_dict["xpath"]).strip().__contains__('?'):
             return f'CAST(NULL AS STRING) AS {str(row_dict["column_alias"]).strip().replace(" ", "_")}'
         else:
             return f'CAST(TRIM(CONCAT_WS("{_sep.value}", XPATH(line, "{row_dict["xpath"]}"))) AS STRING) AS {str(row_dict["column_alias"]).strip().replace(" ", "_")}'
     else:
         if str(row_dict["xpath"]) is None or str(
                 row_dict["xpath"]).strip().__eq__('') or str(
                     row_dict["xpath"]).strip().__eq__('None') or str(
                         row_dict["xpath"]).strip().__contains__('?'):
             return f'CAST(NULL AS STRING) AS {str(row_dict["column_alias"]).strip().replace(" ", "_")}'
         else:
             return f'XPATH(line, "{row_dict["xpath"]}") AS {str(row_dict["column_alias"]).strip().replace(" ", "_")}'
    def getColumnsAndSerdeXpath(row: Row) -> {}:
        ddl_tail_serde_property = ""
        ddl_body_column_and_datatype = ""
        row_dict = row.asDict()
        if str(row_dict["column_alias"]) is None or str(
                row_dict["column_alias"]).strip().__eq__(''):
            print('Skipping empty/null column name')
        elif str(row_dict["xpath"]) is None or str(
                row_dict["xpath"]).strip().__eq__(''):
            ddl_tail_serde_property = f'"column.xpath.{str(row_dict["column_alias"]).strip().replace(" ", "_")}"="{row_dict["xpath"]}"'
            ddl_body_column_and_datatype = f'{str(row_dict["column_alias"]).strip().replace(" ", "_")} ARRAY<STRING>'
        else:
            ddl_tail_serde_property = f'"column.xpath.{str(row_dict["column_alias"]).strip().replace(" ", "_")}"="/PleaseCorrectTheXpath/@InvalidColumn"'
            ddl_body_column_and_datatype = f'{str(row_dict["column_alias"]).strip().replace(" ", "_")} ARRAY<STRING>'

        return {
            'column_datatype': ddl_body_column_and_datatype,
            'serde_property': ddl_tail_serde_property
        }
Example #7
0
def keyvalues(row: T.Row) -> ((str, str), float):
    d = row.asDict()
    key = (d["store_id"], d["dept_id"], d["year"])
    return key, d["mse"]
Example #8
0
 def astraining(row: Row) -> Row:
     df = row.asDict()
     del df['Sales_Pred']
     del df['sales']
     sales = row.asDict()['sales']
     return Row(label=sales, features=list(df.values()))