Ejemplo n.º 1
0
        input = open(file, "r")
        for line in input:
            if ("<title>" in line):
                title = re.sub(
                    r'^\s*<title>', '',
                    line)  # Remove title tags. I'm sure there's some way to
                title = re.sub(r'\s*</title>\s*', '',
                               title)  # do this in one line.
            recipeContent = recipeContent + line
            if ("<url>" in line):
                url = re.sub(
                    r'^\s*<url>', '',
                    line)  # Remove url tags. I'm sure there's some way to
                url = re.sub(r'\s*</url>\s*', '', url)  # do this in one line.
                ##print(file + ': ' + url)
            recipeContent = recipeContent + line
        input.close()
        recipeDataArray.append([title, url, recipeContent])

    print('# starting to calculate embeddings: ' + time.strftime('%H:%M:%S'))

    # Calculate and save embeddings
    for r in recipeDataArray:
        recipeEmbedding = obj.getFlairEmbedding(r[recipeField])
        r.append(recipeEmbedding)

    pickle.dump(recipeDataArray,
                open(recipeDirectory + "recipeDataArray.p", "wb"))

print('# finished: ' + time.strftime('%H:%M:%S'))