'''
Created on 30 Apr 2019

@author: ostlerr
'''
import os
from imageToText.YieldBookToData import getPageScan, correctWords, removePunctuation
import configparser
import re

config = configparser.ConfigParser()
config.read('config.ini')
#experiment = config['EXPERIMENT']['name']
#outfile = open(config['EXPERIMENT']['outfile'], "w+", 1)
srcdocs = config['EXPERIMENT']['srcdocs']

fileList = os.listdir(srcdocs)
fileList.sort()

for fname in fileList:
    nyear = fname[0:4]
    npage = fname[4:6]
    print(nyear + " - " + npage)
    page = getPageScan(srcdocs + "\\" + fname)
    print(page)
def loopDocs():
    global year
    fileList = os.listdir(srcdocs)
    fileList.sort()
    for fname in fileList:
        nyear = fname[0:4]
        #if int(nyear) >= 1992 and int(nyear) <= 2006 and fname.endswith(".jpg"):
        if fname.endswith(".jpg"):
            rawPage = getPageScan(srcdocs + "\\" + fname)
            print("RAWPAGE: [" + rawPage + "]")
            #rawPage = rawPage.replace("\n"," ") # This trick is for retaining line breaks, while allowing for testing line break joined words...
            rawPage = correctWords(rawPage.split(" "), corrections)
            metadata = Metadata()
            hasMetadata = False
            if rawPage.find("Object:") > -1:
                hasMetadata = True
                page = rawPage
                page = trimPage(page, "Object:", "Sponsors:")
                metadata.object = page.replace("\n", " ")
                print("OBJECT: [" + metadata.object + "]")
                metadata.field = identifyField(metadata.object)
                print("FIELD: [" + metadata.field + "]")
            if rawPage.find("Design:") > -1:
                page = rawPage
                page = trimPage(page, "Design:", "Plot dimensions")
                metadata.design = page.replace("\n", " ")
                print("DESIGN: [" + metadata.design + "]")

            if rawPage.lower().find("plot dimensions:") > -1:
                page = rawPage
                page = trimPage(page, "Plot dimensions:", "Treatments")
                metadata.wholeplots = page.replace("\n", " ")
                print("DIMENSIONS: [" + metadata.wholeplots + "]")
                #if rawPage.lower().find("Sub-plot dimensions")

            if rawPage.find("Treatments:") > -1:
                page = rawPage
                page = trimPage(page, "Treatments:", "Experimental diary")
                metadata.treatments = page.replace("\n",
                                                   "\ ")  # markdown paragraph
                print("TREATMENTS: [" + metadata.treatments + "]")

            if hasMetadata:
                metadataOutfile.write(experiment + "|" + str(nyear) + "|" +
                                      metadata.field + "|" + metadata.object +
                                      "|" + metadata.design + "|" +
                                      metadata.wholeplots + "|" +
                                      metadata.subplots + "|" +
                                      metadata.treatments)
                metadataOutfile.write("\n")
            if rawPage.find("Sponsors:") > -1:  # or page.find("$$Seed") > -1:
                page = rawPage
                #page = page.replace("$$", " ")
                page = trimPage(page, "Sponsors:", "The")
                page = page.replace("\n", " ")
                print(page)
                sponsors = getSponsors(page)
                for sponsor in sponsors:
                    sponsorOutfile.write(experiment + "," + str(nyear) + "," +
                                         sponsor)
                    sponsorOutfile.write("\n")