Beispiel #1
0
def main():
    # Parse cli arguments
    parser = argparse.ArgumentParser(
        description="""This is the script responsible for parsing the emails"""
    )
    required = parser.add_argument_group("required arguments")
    optional = parser.add_argument_group("optional arguments")

    required.add_argument("-i",
                          "--input_db",
                          help="Input .db file name of the raw emails",
                          required=True)
    optional.add_argument(
        "-o",
        "--output_db",
        default="data_storage",
        help=
        "Output .db file name of the parsed emails (default is  data_storage)",
        required=True,
    )
    optional.add_argument(
        "--emails_table",
        default="emails",
        help=
        "Name of the table where we will store the parsed emails and of the table where the raw emails are stored (default is emails)",
    )

    args = parser.parse_args()
    input_db = args.input_db
    output_db = args.output_db
    emails_table = args.emails_table

    # input
    raw_emails_data = Database(f"{input_db}.db").get_dataframe(emails_table)
    # output
    data_storage = Database(f"{output_db}.db")
    data_storage.create_emails_table(table_name=emails_table)
    # EmailParser
    print("Let's create an EmailParser")
    parser = ParserFactory.get_parser("Email")
    parser.parse_dataframe(raw_emails_data,
                           db=data_storage,
                           emails_table_name=emails_table)
    print(f"Data from {input_db}.db parsed and saved on {output_db}.db")
    data_storage.close_connection()
def test_db():
    db = Database("test.db", "test_table")
    db.create_emails_table("test_table")
    yield db
    db.close_connection()
    os.remove(config.DATA_DIR + "test.db")