def get_embeddings_per_log(data: defaultdict, model: fasttext.FastText) -> np.ndarray: # create embeddings per log but at first remove '\n' (newline character) from the end embeddings = [ model.get_sentence_vector(log.rstrip()) for logs in data.values() for log in logs ] return np.asarray(embeddings)
def get_embeddings_per_block(data: defaultdict, model: fasttext.FastText, with_timedelta: bool) -> List: # create embeddings per block but at first remove '\n' (newline character) from the end if with_timedelta: embeddings = get_embeddings_with_timedeltas_per_block(data, model) else: embeddings = [ np.asarray( [model.get_sentence_vector(log.rstrip()) for log in logs]) for logs in data.values() ] return embeddings
def get_embeddings_with_timedeltas_per_block(data: defaultdict, model: fasttext.FastText) -> List: embeddings = [] for logs in data.values(): numpy_block = np.zeros(shape=(len(logs), model.get_dimension() + 1), dtype=np.float32) for i, log in enumerate(logs): numpy_block[i, 1:] = model.get_sentence_vector(log.rstrip()) numpy_block[:, 0] = get_timedeltas(logs) embeddings.append(numpy_block) return embeddings