def _upload_to_hive(self, logs): sql_context = HiveContext() # update Hive table df_writer = DataFrameWriter(logs) df_writer.insertInto(self.destination_table, overwrite=True)
lambda a, b: a + b).map(lambda x: tfin(x)) # In[27]: #rdd2.take(10) # In[23]: tdf2 = hiveContext.createDataFrame(rdd2, ['uid', 'loc_ts_dur', 'data_dt']) # In[24]: #tdf2.collect() # In[25]: df_writer = DataFrameWriter(tdf2) df_writer.insertInto(oHiveTable, overwrite=True) # In[26]: sc.stop() # In[31]: #toLocDurTuples('101,2016-06-01@12:04:02,[40:50][202:203],20160601') # In[32]: #tfin(('101_20160601', [(202, 1464782642, 40), (203, 1464782682, 50)]))