def __init__(self, vec_name, data_path=DATA_PATH, out_path=DATA_PATH): """ Loads stepframe form file and checks for correct format :param step_name: filename :param data_path: optional, if none utils.storage.DATA_PATH will be used """ super().__init__() self.vec_name = vec_name self.data_path = data_path self.out_path = out_path vecframe = load_frame(vec_name, data_path) check_if_vecframe(vecframe) self.vecframe = vecframe
def dump_vecframe(self, vf, appendix=None, in_csv=False): """ Dumps resulting vectorframe :param appendix: added to the filename from which Compressor was initialized :return: name of the file created """ check_if_vecframe(vf) if appendix == None: outname = self.out_name else: outname = self.vec_name + "_" + appendix dump_frame(vf, outname, self.out_path, in_csv=in_csv) return outname
def __init__(self, vf_fname, time_dim=0, in_datapath=DATA_PATH): """ Loads vecframe containing the compressed embeddings from file :param step_name: filename :param data_path: optional, if none, default from utils.storage.load_stepframe will be used """ super().__init__() self.vf_fname = vf_fname self.in_datapath = in_datapath if not time_dim: vecframe = load_frame(vf_fname, in_datapath) check_if_vecframe(vecframe) self.vecframe = vecframe else: self.vecframe = load_frame_as_3d_nparray(vf_fname, data_path=in_datapath)
def make_weekly_vecframe(step_name, vec_name='{}_week', data_path=DATA_PATH): ''' Transforms a stepframe into a vecframe without splittling the data. 'desc' will always be 0. :param step_name: name of the stepframe :param vec_name: name under which vecframe will be saved :param data_path: optional, path to data folder :return: ''' stepframe = load_frame(step_name, DATA_PATH) vecframe = stepframe.loc[:, '0':].transpose() vecframe.columns = [str(col) for col in vecframe.columns] vecframe['user'] = vecframe.index vecframe['user'] = vecframe['user'].apply(int) vecframe['desc'] = [0] * vecframe.shape[0] cols = list(vecframe.columns) vecframe = vecframe[cols[-2:] + cols[:-2]] # vecframe = vecframe.reset_index(drop=True) check_if_vecframe(vecframe) dump_frame(vecframe, vec_name.format(step_name), data_path)
def daySplitter(step_name, data_path=DATA_PATH): """ Splits entries into days and saves results as vecframe. """ stepframe = load_frame(step_name, data_path) check_if_stepframe(stepframe) vec_len = stepframe.loc[stepframe.day == 0].shape[0] columns = ['user', 'desc'] + list(range(vec_len)) vfs = [] for day in stepframe.day.unique(): vf = stepframe[stepframe.day == day].iloc[:, 4:999 + 4].T.astype('int32') vf.columns = list(range(vec_len)) vf['user'] = vf.index.to_numpy(dtype=pd.np.int) vf['desc'] = day vfs.append(vf) vecframe = pd.concat(vfs, sort=False, ignore_index=True) vecframe = vecframe[columns] vecframe.columns = vecframe.columns.astype(str) check_if_vecframe(vecframe) dump_frame(vecframe, '{}_dsp'.format(step_name))
[seq_len, 1, 1]) out, emb = model(row_tens) emb = torch.squeeze(emb) embArr.append(head + (emb.data.tolist())) outArr.append(head + (out.data.tolist())) emb_df = pd.DataFrame(data=embArr, columns=['user', 'desc'] + [str(i) for i in range(len(embArr[0]) - 2)]) out_df = pd.DataFrame(data=outArr, columns=['user', 'desc'] + [str(i) for i in range(len(outArr[0]) - 2)]) print(out_df.shape, emb_df.shape) print(out_df.head(3)) print(emb_df.head(3)) check_if_vecframe(out_df) check_if_vecframe(emb_df) outname = in_dsp_fname + "_LSTM_out_" + str(epochs) + "_" + str(batchsize) embname = in_dsp_fname + "_LSTM_emb_" + str(epochs) + "_" + str(batchsize) dump_frame(frame=out_df, name=outname, in_csv=True) dump_frame(frame=emb_df, name=embname, in_csv=True) print("written emb and out to disk in ", outname, embname)